In [105]:
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn import model_selection
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import pandas as pd

In [106]:
data = pd.read_csv('car.data', names=["buying", "maint", "doors", "persons", "lug_boot", "safety","target_class"])

In [107]:
data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,target_class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [108]:
data.describe()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,target_class
count,1728,1728,1728,1728,1728,1728,1728
unique,4,4,4,3,3,3,4
top,high,high,3,more,small,high,unacc
freq,432,432,432,576,576,576,1210


In [109]:
data.buying.unique()

array(['vhigh', 'high', 'med', 'low'], dtype=object)

In [110]:
data['buying'] = data['buying'].map({'vhigh': 4, 'high': 3, 'med': 2, 'low':1})

In [111]:
data.maint.unique()

array(['vhigh', 'high', 'med', 'low'], dtype=object)

In [112]:
data['maint'] = data['maint'].map({'vhigh': 4, 'high': 3, 'med': 2, 'low':1})

In [113]:
data.doors.unique()

array(['2', '3', '4', '5more'], dtype=object)

In [114]:
data['doors'] = data['doors'].map({'2': 2, '3': 3, '4': 4, '5more':5})

In [115]:
data.persons.unique()

array(['2', '4', 'more'], dtype=object)

In [116]:
data['persons'] = data['persons'].map({'2': 2, '4': 4, 'more':5})

In [117]:
data.lug_boot.unique()

array(['small', 'med', 'big'], dtype=object)

In [118]:
data['lug_boot'] = data['lug_boot'].map({'small': 1, 'med': 2, 'big':3})

In [119]:
data.safety.unique()

array(['low', 'med', 'high'], dtype=object)

In [120]:
data['safety'] = data['safety'].map({'low': 1, 'med': 2, 'high':3})

In [121]:
data.target_class.unique()

array(['unacc', 'acc', 'vgood', 'good'], dtype=object)

In [122]:
x=data.iloc[:,:6].values
y=data.iloc[:,6].values

In [123]:
x

array([[4, 4, 2, 2, 1, 1],
       [4, 4, 2, 2, 1, 2],
       [4, 4, 2, 2, 1, 3],
       ...,
       [1, 1, 5, 5, 3, 1],
       [1, 1, 5, 5, 3, 2],
       [1, 1, 5, 5, 3, 3]], dtype=int64)

In [124]:
y

array(['unacc', 'unacc', 'unacc', ..., 'unacc', 'good', 'vgood'],
      dtype=object)

In [125]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.metrics import f1_score
import numpy as np


c_values = list(np.arange(1,100))


param_grid = [
    {
        'C':c_values,
        'penalty': ['l1'],
        'solver':['liblinear'],
        'multi_class': ['ovr'],
        'random_state':[42],
        'max_iter' :[5000]
    },{
        'C':c_values,
        'penalty': ['l2'],
        'solver':['newton-cg','lbfgs'],
        'multi_class': ['multinomial'],
        'random_state':[42],
        'max_iter' :[5000]
    }
]

In [126]:
scorer = make_scorer(f1_score, average = 'weighted')

In [127]:
grid = GridSearchCV(LogisticRegression(),param_grid, cv=10, scoring='accuracy')

In [128]:
grid.fit(x,y)

KeyboardInterrupt: 

In [None]:
print(grid.best_params_)
print('Best Score: {:.2f}%'.format(grid.best_score_*100))

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

In [None]:
logreg = LogisticRegression(C=7, multi_class='multinomial', penalty='l2', random_state=42, solver='newton-cg')
logreg.fit(x_train, y_train)
y_pred = logreg.predict(x_test)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(x_test, y_test)))

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

In [None]:
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(x_test, y_test)))

In [None]:
print(classification_report(y_test, y_pred))