Data from: https://archive.ics.uci.edu/dataset/19/car+evaluation

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report
from sklearn.preprocessing import OrdinalEncoder
import numpy as np




In [26]:
# car data:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
car_evaluation = fetch_ucirepo(id=19) 
  
# data (as pandas dataframes) 
car_evaluation_df = car_evaluation.data.features 

X = car_evaluation_df[['maint','doors','persons','lug_boot','safety']]
X['class'] = car_evaluation.data.targets['class']
y = car_evaluation_df[['buying']]




In [6]:
# metadata 
print(car_evaluation.metadata) 
  
# variable information 
print(car_evaluation.variables) 

{'uci_id': 19, 'name': 'Car Evaluation', 'repository_url': 'https://archive.ics.uci.edu/dataset/19/car+evaluation', 'data_url': 'https://archive.ics.uci.edu/static/public/19/data.csv', 'abstract': 'Derived from simple hierarchical decision model, this database may be useful for testing constructive induction and structure discovery methods.', 'area': 'Other', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1728, 'num_features': 6, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1988, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5JP48', 'creators': ['Marko Bohanec'], 'intro_paper': {'title': 'Knowledge acquisition and explanation for multi-attribute decision making', 'authors': 'M. Bohanec, V. Rajkovič', 'published_in': '8th Intl Workshop on Expert Systems and their Applications, Avignon, France', 'yea

In [173]:
# Define the custom order for the categorical column
x_categories_dict ={
                
                'maint': ['low', 'med', 'high', 'vhigh'],
                'doors': ['2','3','4','5more'],
                'persons': ['2','4','more'],
                'lug_boot': ['small','med','big'],
                'safety': ['low','med','high'],
                'class': ['unacc', 'acc', 'good', 'vgood']
}

y_categories_dict = {
    'buying': ['low', 'med', 'high', 'vhigh'],
}

encoder = OrdinalEncoder()
transformed_X = encoder.set_params(encoded_missing_value=-1).fit_transform(X)
transformed_Y = encoder.set_params(encoded_missing_value=-1).fit_transform(y)





In [174]:
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(transformed_X, transformed_Y, test_size=0.2, random_state=42)



In [175]:

# Initialize the ____ model
model = LogisticRegression()

In [176]:
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(transformed_X, transformed_Y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LogisticRegression()

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_report(y_test, y_pred))

# Optionally, print the model coefficients
print(f'Coefficients: {model.coef_}')
print(f'Intercept: {model.intercept_}')


Accuracy: 0.22832369942196531
Classification Report:
              precision    recall  f1-score   support

         0.0       0.19      0.12      0.15        92
         1.0       0.21      0.18      0.19        83
         2.0       0.19      0.31      0.24        77
         3.0       0.32      0.31      0.31        94

    accuracy                           0.23       346
   macro avg       0.23      0.23      0.22       346
weighted avg       0.23      0.23      0.22       346

Coefficients: [[-0.0056364  -0.01396603 -0.02885368  0.02504834  0.01564097 -0.12467016]
 [-0.01888805  0.00386267  0.01140262  0.04260909 -0.00813174  0.04435731]
 [ 0.01575472  0.03163789 -0.05648301 -0.04157133 -0.0038904  -0.08304333]
 [ 0.00876973 -0.02153453  0.07393408 -0.0260861  -0.00361883  0.16335618]]
Intercept: [ 0.19557549 -0.07904528  0.18962514 -0.30615535]


# Example from [sklearn](https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html) on testing on various models  

Wanted to iterate over multiple classification models to get best score


Other references:  
https://medium.com/@will_47810/how-to-train-a-linear-classification-model-using-scikit-learn-c8f166371b85

In [51]:
# Code source: Gaël Varoquaux
#              Andreas Müller
# Modified for documentation by Jaques Grobler
# License: BSD 3 clause

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap

from sklearn.datasets import make_circles, make_classification, make_moons
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier


In [52]:


classifiers = [
    KNeighborsClassifier(4),
    SVC(kernel="linear", C=0.025, random_state=42),
    SVC(gamma=2, C=1, random_state=42),
    GaussianProcessClassifier(1.0 * RBF(1.0), random_state=42),
    DecisionTreeClassifier(max_depth=5, random_state=42),
    RandomForestClassifier(
        max_depth=5, n_estimators=10, max_features=1, random_state=42
    ),
    MLPClassifier(alpha=1, max_iter=1000, random_state=42),
    AdaBoostClassifier(algorithm="SAMME", random_state=42),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    SGDClassifier(random_state=42)
]





In [164]:
import warnings
warnings.filterwarnings("ignore")

# iterate over classifiers
for name, clf in zip(names, classifiers):


    clf_pipeline = make_pipeline( clf)
    clf_pipeline.fit(X_train, y_train)
    score = clf_pipeline.score(X_test, y_test)

    print(f"classifier {type(clf)}: {score}")


classifier <class 'sklearn.neighbors._classification.KNeighborsClassifier'>: 0.1416184971098266
classifier <class 'sklearn.svm._classes.SVC'>: 0.22254335260115607
classifier <class 'sklearn.svm._classes.SVC'>: 0.06936416184971098
classifier <class 'sklearn.gaussian_process._gpc.GaussianProcessClassifier'>: 0.22254335260115607
classifier <class 'sklearn.tree._classes.DecisionTreeClassifier'>: 0.22832369942196531
classifier <class 'sklearn.ensemble._forest.RandomForestClassifier'>: 0.18497109826589594
classifier <class 'sklearn.neural_network._multilayer_perceptron.MLPClassifier'>: 0.2976878612716763
classifier <class 'sklearn.ensemble._weight_boosting.AdaBoostClassifier'>: 0.3439306358381503
classifier <class 'sklearn.naive_bayes.GaussianNB'>: 0.2861271676300578
classifier <class 'sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis'>: 0.24277456647398843


In [169]:
X.head(1)

Unnamed: 0,maint,doors,persons,lug_boot,safety,class
0,vhigh,2,2,small,low,unacc


In [192]:
## Best model

clf = AdaBoostClassifier(algorithm="SAMME", random_state=42)
clf.fit(transformed_X, transformed_Y)

# Define predictor parameters:
x_predict_dict = {'maint': ['vhigh'], 
        'doors': [2],
        'persons':[2],
        'lug_boot':['small'],
        'safety':['low'],
        'class':['unacc']
        }
x_predict_df = pd.DataFrame(data=x_predict_dict)

predict_transformed_X = encoder.set_params(encoded_missing_value=-1).fit_transform(x_predict_df)



In [191]:
# Prediction
y_categories_dict['buying'][int(clf.predict(predict_transformed_X)[0]) - 1]

'med'