In [34]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix,classification_report, precision_recall_curve
import numpy as np
X_train, X_test, y_train, y_test = train_test_split(cuisines_feature_df, cuisines_label_df, test_size=0.3)


In [48]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Create classifiers and train them
classifiers = {
    'Linear SVC': SVC(kernel='linear', C=C, probability=True, random_state=0)
}

for index, (name, classifier) in enumerate(classifiers.items()):
    classifier.fit(X_train_scaled, np.ravel(y_train))

    y_pred = classifier.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy (train) for %s: %0.1f%%" % (name, accuracy * 100))
    print(classification_report(y_test, y_pred))


Accuracy (train) for Linear SVC: 70.4%
              precision    recall  f1-score   support

     chinese       0.69      0.70      0.70       108
      indian       0.79      0.87      0.83        83
    japanese       0.60      0.57      0.58        60
      korean       0.75      0.71      0.73       113
        thai       0.59      0.57      0.58        42

    accuracy                           0.70       406
   macro avg       0.68      0.68      0.68       406
weighted avg       0.70      0.70      0.70       406



In [42]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Handle missing values
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Create classifiers
classifiers = {
    'Linear SVC': SVC(kernel='linear', C=C, probability=True, random_state=0),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=0)
}

# Train and evaluate classifiers
for name, classifier in classifiers.items():
    classifier.fit(X_train_scaled, np.ravel(y_train))
    y_pred = classifier.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy (test) for %s: %0.1f%%" % (name, accuracy * 100))
    print(classification_report(y_test, y_pred))


Accuracy (test) for Linear SVC: 69.0%
              precision    recall  f1-score   support

     chinese       0.65      0.69      0.67       108
      indian       0.80      0.86      0.83        83
    japanese       0.56      0.52      0.54        60
      korean       0.74      0.69      0.71       113
        thai       0.61      0.60      0.60        42

    accuracy                           0.69       406
   macro avg       0.67      0.67      0.67       406
weighted avg       0.69      0.69      0.69       406

Accuracy (test) for Random Forest: 75.1%
              precision    recall  f1-score   support

     chinese       0.77      0.71      0.74       108
      indian       0.85      0.92      0.88        83
    japanese       0.71      0.60      0.65        60
      korean       0.71      0.77      0.74       113
        thai       0.67      0.69      0.68        42

    accuracy                           0.75       406
   macro avg       0.74      0.74      0.74       40

In [53]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Fit the classifiers

for index, (name, classifier) in enumerate(classifiers.items()):
    classifier.fit(X_train, np.ravel(y_train))

    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy for %s: %0.1f%% " % (name, accuracy * 100))

    # Print confusion matrix
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    # Print classification report
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

Accuracy for Linear SVC: 70.4% 
Confusion Matrix:
[[76  4 12 10  6]
 [ 3 72  0  4  4]
 [11  1 34 12  2]
 [15  3 10 80  5]
 [ 5 11  1  1 24]]
Classification Report:
              precision    recall  f1-score   support

     chinese       0.69      0.70      0.70       108
      indian       0.79      0.87      0.83        83
    japanese       0.60      0.57      0.58        60
      korean       0.75      0.71      0.73       113
        thai       0.59      0.57      0.58        42

    accuracy                           0.70       406
   macro avg       0.68      0.68      0.68       406
weighted avg       0.70      0.70      0.70       406



## Interpretation of the result
Below is the result of performance metrics for two classifiers: Linear SVC and Random Forest. 

# For Linear SVC:

Accuracy (test): The accuracy of the Linear SVC classifier on the test set is 69.0%. This means that the classifier correctly predicted the class label for 69.0% of the instances in the test set.

Classification Report: The classification report provides precision, recall, and F1-score metrics for each class, along with the support (number of instances) for each class. Precision measures the proportion of correctly predicted instances of a particular class out of all instances predicted as that class. Recall measures the proportion of instances of a particular class that were correctly predicted out of all instances that belong to that class. The F1-score is the harmonic mean of precision and recall, providing a balanced measure of the classifier's performance. The weighted average of precision, recall, and F1-score is also provided, taking into account the support for each class.

# For Random Forest:

Accuracy (test): The accuracy of the Random Forest classifier on the test set is 75.1%. This means that the classifier correctly predicted the class label for 75.1% of the instances in the test set.

Classification Report: Similar to the Linear SVC classifier, the classification report provides precision, recall, and F1-score metrics for each class, along with the support (number of instances) for each class. The weighted average of precision, recall, and F1-score is also provided.

In both cases, the classification reports provide a breakdown of the performance of the classifiers for each class. The precision, recall, and F1-score values indicate the classifier's ability to correctly predict instances for each class. The accuracy metric gives an overall measure of the classifier's performance on the entire test set.

Comparing the results, we can see that the Random Forest classifier achieved a higher accuracy (75.1%) compared to the Linear SVC classifier (69.0%). Additionally, the precision, recall, and F1-score values for Random Forest tend to be higher for most classes, indicating a better overall performance compared to Linear SVC.
