In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv("Cleaned_Indian_Food_Dataset.csv")
df = df[["TranslatedRecipeName", "TranslatedIngredients"]]
df.head()

Unnamed: 0,TranslatedRecipeName,TranslatedIngredients
0,Masala Karela Recipe,"1 tablespoon Red Chilli powder,3 tablespoon Gr..."
1,Spicy Tomato Rice (Recipe),"2 teaspoon cashew - or peanuts, 1/2 Teaspoon ..."
2,Ragi Semiya Upma Recipe - Ragi Millet Vermicel...,"1 Onion - sliced,1 teaspoon White Urad Dal (Sp..."
3,Gongura Chicken Curry Recipe - Andhra Style Go...,"1/2 teaspoon Turmeric powder (Haldi),1 tablesp..."
4,Andhra Style Alam Pachadi Recipe - Adrak Chutn...,"oil - as per use, 1 tablespoon coriander seed..."


In [3]:
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('tablespoons', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('teaspoons', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('tablespoon', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('teaspoon', '')

df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('- to taste', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('- as per taste', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('- as per use', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('- as required', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('- to make the dough', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('- sliced', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('- a generous pinch', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('cup', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('cups', '')
df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('/', '')


df['TranslatedIngredients'] = df['TranslatedIngredients'].str.replace('\d+', '', regex = True)
df['TranslatedIngredients']

0         Red Chilli powder,  Gram flour (besan),  Cum...
1          cashew - or peanuts,  Teaspoon mustard,  dr...
2        Onion ,  White Urad Dal (Split), Green Chilli...
3         Turmeric powder (Haldi),  Coriander (Dhania)...
4        oil ,   coriander seeds,  Teaspoon mustard,  ...
                              ...                        
5933      Rose water,  Ghee, s Paneer (Homemade Cottag...
5934     sprig Basil leaves - finely chopped, s Whole ...
5935      Sugar,  Milk - vegans can substitute this in...
5936     pinch Turmeric powder (Haldi),  Mustard seeds...
5937    Salt , to  Cloves (Laung),Ghee , to  Black car...
Name: TranslatedIngredients, Length: 5938, dtype: object

In [4]:
X = df['TranslatedIngredients']
y = df['TranslatedRecipeName']

In [5]:
vectorizer = CountVectorizer()

# Convert text to numerical features
X_vectorized = vectorizer.fit_transform(X)

In [6]:
import pickle

In [7]:
pickle.dump(X_vectorized,open('vectoriser.pkl','wb'))

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

In [9]:
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

In [10]:
y_pred = classifier.predict(X_test)

In [11]:
new_ingredients = ["Cheese", "1/2 teaspoon Turmeric powder (Haldi)", "Paneer"]
new_ingredients_vectorized = vectorizer.transform(new_ingredients)
predicted_recipe_names = classifier.predict(new_ingredients_vectorized)
print("Predicted Recipe Names:", predicted_recipe_names)

Predicted Recipe Names: ['Roasted Vegetable Pasta Primavera Recipe' 'Kadai Mushroom Recipe'
 'Vegetarian Malai Kebab Recipe']


In [12]:
import pickle

In [13]:
pickle.dump(classifier,open('model.pkl','wb'))

In [14]:
# model=pickle.load(open('model.pkl','rb'))
# vectorizer = CountVectorizer()
# new_ingredients = ["Cheese", "1/2 teaspoon Turmeric powder (Haldi)", "Paneer"]
# new_ingredients_vectorized = vectorizer.transform(new_ingredients)
# predicted_recipe_names = model.predict(new_ingredients_vectorized)
# print("Predicted Recipe Names:", predicted_recipe_names)

In [15]:
import joblib
joblib.dump(classifier,'model.joblib')
joblib.dump(vectorizer, 'vectorizer.joblib')

['vectorizer.joblib']