# Parameter Play
Instructions

There are a lot of parameters that are set by default when working with these classifiers. Adopt one of the ML Classification Techniques in this lesson and retrain models tweaking various parameter values. 

In [10]:
# Import the reqiured libraries:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve

In [11]:
import pandas as pd
import numpy as np

cuisines_df = pd.read_csv('cleaned_cuisines.csv')
cuisines_df.head()

Unnamed: 0.1,Unnamed: 0,cuisine,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,indian,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,indian,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [12]:
cuisines_label_df = cuisines_df['cuisine']
cuisines_label_df.head()

0    indian
1    indian
2    indian
3    indian
4    indian
Name: cuisine, dtype: object

In [13]:
cuisines_feature_df = cuisines_df.drop(['Unnamed: 0', 'cuisine'], axis=1)
cuisines_feature_df.head()

Unnamed: 0,almond,angelica,anise,anise_seed,apple,apple_brandy,apricot,armagnac,artemisia,artichoke,...,whiskey,white_bread,white_wine,whole_grain_wheat_flour,wine,wood,yam,yeast,yogurt,zucchini
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [14]:
# split training and test data
X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df,cuisines_label_df,test_size=0.3)

In [15]:
C = 10
# Create different classifiers
classifiers = {
    'Linear SVC': SVC(kernel='linear',C=C,probability=True,random_state=0),
    'KNN classifier': KNeighborsClassifier(C),
    'SVC': SVC(),
    'RFST': RandomForestClassifier(n_estimators=100),
    'ADA': AdaBoostClassifier(n_estimators=100)
}

# Train your model using the Linear SVC and print out a report:
n_classifiers = len(classifiers)
for index, (name,classifier) in enumerate(classifiers.items()):
    classifier.fit(X_train,np.ravel(y_train))
    
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test,y_pred)
    print('Accuracy (train) for %s:%0.1f%%' % (name,accuracy * 100))
    print(classification_report(y_test,y_pred))

Accuracy (train) for Linear SVC:76.2%
              precision    recall  f1-score   support

     chinese       0.61      0.68      0.65       221
      indian       0.85      0.87      0.86       229
    japanese       0.72      0.76      0.74       237
      korean       0.87      0.73      0.79       253
        thai       0.78      0.78      0.78       259

    accuracy                           0.76      1199
   macro avg       0.77      0.76      0.76      1199
weighted avg       0.77      0.76      0.76      1199

Accuracy (train) for KNN classifier:72.4%
              precision    recall  f1-score   support

     chinese       0.66      0.67      0.66       221
      indian       0.82      0.82      0.82       229
    japanese       0.60      0.84      0.70       237
      korean       0.90      0.53      0.66       253
        thai       0.75      0.78      0.76       259

    accuracy                           0.72      1199
   macro avg       0.75      0.73      0.72      11

Using the random forest classifier

In [16]:
classifier = RandomForestClassifier(n_estimators=100)
classifier.fit(X_train,np.ravel(y_train))
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print(f'Random Forest Accuracy (train): {accuracy * 100}')
print(classification_report(y_test,y_pred))

Random Forest Accuracy (train): 80.40033361134279
              precision    recall  f1-score   support

     chinese       0.73      0.72      0.73       221
      indian       0.84      0.90      0.87       229
    japanese       0.77      0.79      0.78       237
      korean       0.87      0.79      0.83       253
        thai       0.80      0.82      0.81       259

    accuracy                           0.80      1199
   macro avg       0.80      0.80      0.80      1199
weighted avg       0.80      0.80      0.80      1199



lets now increase the estimators to 200

In [17]:
classifier = RandomForestClassifier(n_estimators=200)
classifier.fit(X_train,np.ravel(y_train))
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print(f'Random Forest Accuracy (train): {accuracy * 100}')
print(classification_report(y_test,y_pred))

Random Forest Accuracy (train): 81.31776480400333
              precision    recall  f1-score   support

     chinese       0.75      0.75      0.75       221
      indian       0.85      0.89      0.87       229
    japanese       0.78      0.81      0.80       237
      korean       0.89      0.78      0.83       253
        thai       0.80      0.83      0.82       259

    accuracy                           0.81      1199
   macro avg       0.81      0.81      0.81      1199
weighted avg       0.82      0.81      0.81      1199

