Machine learning similarity matrix

In [12]:
import pandas as pd

# Load the dataset
df = pd.read_csv('dishes_and_ingredients_updated.csv')

# Print the column names to check for discrepancies
print(df.columns)

Index(['Dish Name', 'Ingredients'], dtype='object')


In [13]:
df.columns = df.columns.str.strip()

In [14]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [21]:
df = pd.read_csv('dishes_and_ingredients_updated.csv')
df.columns = df.columns.str.strip()
print(df.columns)

# the ingredient list
all_ingredients = set()
for ingredients in df['Ingredients']:
    all_ingredients.update(ingredients.split(','))

ingredient_list = list(all_ingredients)

# Create the binary matrix for dishes
def create_binary_matrix(dishes_df, ingredient_list):
    dish_matrix = []
    for ingredients in dishes_df['Ingredients']:
        # Create a binary vector for the dish based on its ingredients
        dish_vector = [1 if ingredient in ingredients.split(',') else 0 for ingredient in ingredient_list]
        dish_matrix.append(dish_vector)
    return dish_matrix

# Create the binary feature matrix
dish_matrix = create_binary_matrix(df, ingredient_list)

# Convert it to a DataFrame for easier handling
dish_df = pd.DataFrame(dish_matrix, columns=ingredient_list, index=df['Dish_Name'])

# Function to recommend a dish based on liked ingredients
def recommend_dish(liked_ingredients):
    # Convert liked ingredients into a binary vector
    liked_vector = [1 if ingredient in liked_ingredients else 0 for ingredient in ingredient_list]

    # Calculate the cosine similarity between the liked ingredients and all dishes
    similarities = cosine_similarity([liked_vector], dish_df)

    # Get the dish with the highest similarity
    recommended_dish_index = similarities.argmax()
    recommended_dish = dish_df.index[recommended_dish_index]

    return recommended_dish

# Example: A student likes these ingredients
liked_ingredients = ['garlic', 'onion', 'cumin', 'yogurt']

# Recommend a dish
recommended_dish = recommend_dish(liked_ingredients)
print(f"Recommended dish for the student: {recommended_dish}")


Index(['Dish_Name', 'Ingredients'], dtype='object')
Recommended dish for the student: Couscous


# Deep learning

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

df = pd.read_csv('dishes_and_ingredients_updated.csv')

 # ingredients are seperated with coma
def parse_ingredients(ingredients_str):
    # Split the string by commas and strip extra spaces
    return [ingredient.strip() for ingredient in ingredients_str.split(',')]

# Apply the parsing function to each row in the ingredients column
df['ingredients'] = df['Ingredients'].apply(parse_ingredients)

# Flatten all unique ingredients
all_ingredients = set()
for ingredients in df['Ingredients']:
    all_ingredients.update(ingredients)

all_ingredients = list(all_ingredients)


mlb = MultiLabelBinarizer()
ingredient_matrix = mlb.fit_transform(df['Ingredients'])


model = Sequential([
    Dense(128, input_dim=ingredient_matrix.shape[1], activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Output a score for each dish
])


model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])


labels = np.random.randint(2, size=(df.shape[0], 1))


model.fit(ingredient_matrix, labels, epochs=50, batch_size=32)

# EXAMPLE OF TESTING
student_likes = ['olive oil', 'garlic', 'tomato']
student_vector = mlb.transform([student_likes])

# Predict the dish score for each dish
dish_scores = model.predict(ingredient_matrix)

# Add predicted scores to the dataframe
df['predicted_score'] = dish_scores

# Recommend the dish with the highest score
best_dish_index = np.argmax(dish_scores)
recommended_dish = df.iloc[best_dish_index]

print("Recommended Dish: ", recommended_dish['Dish_Name'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - accuracy: 0.5190 - loss: 0.7021
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.6119 - loss: 0.6839
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6186 - loss: 0.6712 
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7078 - loss: 0.6607
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6632 - loss: 0.6538 
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5911 - loss: 0.6496 
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5465 - loss: 0.6416 
Epoch 8/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5911 - loss: 0.6319 
Epoch 9/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 192ms/step
Recommended Dish:  Chtitha Batata


In [4]:
import joblib
joblib.dump(model, 'dish_predictor_model.pkl')

['dish_predictor_model.pkl']

In [5]:
model.save('dish_predictor_model.h5')



In [6]:
import tensorflow as tf

# Load your trained model
model = tf.keras.models.load_model('dish_predictor_model.h5')

# Convert to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TFLite model
with open('dish_predictor_model.tflite', 'wb') as f:
    f.write(tflite_model)



Saved artifact at '/tmp/tmpi_zfvsq2'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 33), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  137805637594528: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137805637596992: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137805638179072: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137805638183824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137805638188224: TensorSpec(shape=(), dtype=tf.resource, name=None)
  137805638180480: TensorSpec(shape=(), dtype=tf.resource, name=None)


In [7]:
from google.colab import files
files.download('dish_predictor_model.tflite')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [8]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.8.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.5.1 (from gradio)
  Downloading gradio_client-1.5.1-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [11]:
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

df = pd.read_csv('dishes_and_ingredients_updated.csv')

# ingredients are separated with coma
def parse_ingredients(ingredients_str):
    # Split the string by commas and strip extra spaces
    return [ingredient.strip() for ingredient in ingredients_str.split(',')]

# Apply the parsing function to each row in the ingredients column
df['ingredients'] = df['Ingredients'].apply(parse_ingredients)

# Flatten all unique ingredients
all_ingredients = set()
for ingredients in df['Ingredients']:
    all_ingredients.update(ingredients)

all_ingredients = list(all_ingredients)


mlb = MultiLabelBinarizer()
ingredient_matrix = mlb.fit_transform(df['ingredients'])


model = Sequential([
    Dense(128, input_dim=ingredient_matrix.shape[1], activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Output a score for each dish
])


model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])


labels = np.random.randint(2, size=(df.shape[0], 1))


model.fit(ingredient_matrix, labels, epochs=50, batch_size=32, verbose=0) # Set verbose=0 to suppress training output in Gradio

def recommend_dish(ingredients):
    student_likes = [ingredient.strip() for ingredient in ingredients.split(',')]
    student_vector = mlb.transform([student_likes])
    dish_scores = model.predict(ingredient_matrix)
    best_dish_index = np.argmax(dish_scores)
    recommended_dish = df.iloc[best_dish_index]
    return recommended_dish['Dish_Name']

iface = gr.Interface(
    fn=recommend_dish,
    inputs=gr.Textbox(lines=2, placeholder="Enter ingredients separated by commas"),
    outputs="text",
    title="Dish Recommender",
    description="Enter your favorite ingredients and get a dish recommendation!"
)

iface.launch()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5817a8c8a8745a7527.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


