# **Auxiliary Python code for classification task**

### Includes preprocessing lookup ingredients
### Generates TF-IDF vectorized ingredients
### Saves the X features and Y corresponding labels
### Trains svm on whole dataset and save it to be used for classify a new recipe 

***Created by Rahul Maheshwari***

In [1]:
# all imports
import re
import pandas as pd
from joblib import dump
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn import preprocessing
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [None]:
# for lemmatizing ingredients
lm = WordNetLemmatizer()

In [None]:
# load dataset and drop non-contributing columns
df = pd.read_csv("recipes.csv")
pd.set_option('display.max_columns', None)
df = df.drop(['Recipe ID', 'Title', 'URL', 'Rating', 'Serves', 'Ingredients', 'Cooking instructions', 'Rating Score'],
             axis=1)

**Perform pre-processing on features and Vectorizing features (ingredients)**

In [None]:
new = []
for ing in df['Lookup Ingredients']:
    ingredient = ing[1:-1]
    ingredients = re.sub(r'[^\w\s]', '', ingredient).split(' ')
    ingredient = ' '.join(ingredients)
    new.append(ingredient)
df['new_ingredients'] = new
l = []
for s in df['new_ingredients']:
    words = word_tokenize(s)
    word_ps = []
    for w in words:
        word_ps.append(lm.lemmatize(w))
    s = ' '.join(word_ps)
    l.append(s)
df['modified_ingredients'] = l
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['modified_ingredients'])
le = preprocessing.LabelEncoder()
le.fit(df['Meal'])
df['Meal'] = le.transform(df['Meal'])
Y = df['Meal']

**Save X and Y arrays to be used in the GUI for training and evaluation of various models**

In [None]:
np.save("X_array", X)
pickle_file = open('Y_array', 'wb')
pickle.dump(Y, pickle_file)
pickle_file.close()

**Training the model and saving for using it in GUI**

In [None]:
meal_map = {0: 'Breakfast', 1: 'Lunch', 2: 'Dinner'}

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
svm = SVC(C=20, kernel='rbf', gamma=1)
svm.fit(X, Y)
dump(svm, 'svm_model.joblib')
ingredients = input("Enter ing").split(',')
ingredients = [i.strip() for i in ingredients]
test_input = vectorizer.transform(ingredients)

y_pred = svm.predict(test_input)
# svm_accuracy = accuracy_score(y_test, y_pred) * 100
# print(svm_accuracy)
# print(meal_map[y_pred[0]])
print(meal_map[y_pred[0]])