# **Classifier Accuracy Comparison plot**

### Train various classifier and get accuracy
### Save trained models which can be used directly without retraining
### Plot classifier accuracy comparison plot

***Created by Rahul Maheshwari***

In [None]:
# all necessary imports
import re
import matplotlib.pyplot as plt
import pandas as pd
from joblib import dump
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

**Dataset loaded as dataframe and pre-processing is applied**

In [None]:
lm = WordNetLemmatizer()
df = pd.read_csv("recipes.csv")
pd.set_option('display.max_columns', None)
df = df.drop(['Recipe ID', 'Title', 'URL', 'Rating', 'Serves', 'Ingredients', 'Cooking instructions', 'Rating Score'],
             axis=1)

In [None]:
new = []
for ing in df['Lookup Ingredients']:
    ingredient = ing[1:-1]
    ingredients = re.sub(r'[^\w\s]', '', ingredient).split(' ')
    ingredient = ' '.join(ingredients)
    new.append(ingredient)
df['new_ingredients'] = new
l = []
for s in df['new_ingredients']:
    words = word_tokenize(s)
    word_ps = []
    for w in words:
        word_ps.append(lm.lemmatize(w))
    s = ' '.join(word_ps)
    l.append(s)
df['modified_ingredients'] = l
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['modified_ingredients'])
le = preprocessing.LabelEncoder()
le.fit(df['Meal'])
df['Meal'] = le.transform(df['Meal'])
Y = df['Meal']

**Vectorized features and label encoded labels are saved**

In [None]:
np.save("X_array", X)
pickle_file = open('Y_array', 'wb')
pickle.dump(Y, pickle_file)
pickle_file.close()

**Train test split of dataset**

In [None]:
meal_map = {0: 'Breakfast', 1: 'Lunch', 2: 'Dinner'}

classifier_accuracy = {}

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

**Training SVM model with hyperparameter tuning** 

In [None]:
svm = SVC(C=20, kernel='rbf', gamma=1)
svm.fit(X, Y)
dump(svm, 'svm_model.joblib')
y_pred = svm.predict(X_test)
svm_accuracy = accuracy_score(y_test, y_pred) * 100
classifier_accuracy["SVM"] = round(svm_accuracy, 2)
print("SVC Accuracy:", svm_accuracy)

**Training Random Forest model with hyperparameter tuning**

In [None]:
rf = RandomForestClassifier(bootstrap=True, max_depth=None, max_features=30, n_estimators=100, random_state=42)
rf.fit(X, Y)
dump(rf, 'rf_model.joblib')
y_pred = rf.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred) * 100
classifier_accuracy["Random Forest"] = round(rf_accuracy, 2)
print("Random Forest Classifier Accuracy:", rf_accuracy)

**Training Decision Tree model with hyperparameter tuning** 

In [None]:
dt_classifier = DecisionTreeClassifier(max_depth=155, max_features=40, random_state=42)
dt_classifier.fit(X, Y)
dump(dt_classifier, 'dt_model.joblib')
y_pred = dt_classifier.predict(X_test)
dt_accuracy = accuracy_score(y_test, y_pred) * 100
classifier_accuracy["Decision Tree"] = round(dt_accuracy, 2)
print("Decision Tree Classifier Accuracy:", dt_accuracy)

**Training KNN model with hyperparameter tuning** 

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(X, Y)
dump(knn_classifier, 'knn_model.joblib')
y_pred = knn_classifier.predict(X_test)
knn_accuracy = accuracy_score(y_test, y_pred) * 100
classifier_accuracy["KNN"] = round(knn_accuracy, 2)
print("KNN Classifier Accuracy:", knn_accuracy)

**Plot bar graph of accuracy vs classifier**

In [None]:
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot()
plt.title("Classifier Vs Accuracy", size=15)
plt.xlabel("Classifier", size=15)
plt.ylabel("Accuracy (%)", size=15)
plt.bar(classifier_accuracy.keys(), classifier_accuracy.values())
for i, j in enumerate(classifier_accuracy.values()):
    ax.text(i - 0.15, j + 0.5, str(str(j) + "%"), color='blue', size=15)
plt.show()