# Model Evaluation

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import re

from gensim.models import Word2Vec
from sklearn.metrics.pairwise import cosine_similarity
import mlflow

import json
import requests
import boto3

import streamlit as st

import warnings
warnings.filterwarnings("ignore")

### Logging

In [None]:
model_path = "word2vec.model"
recipe_path = "processed_cookbook.pkl"

mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("word2vec_experiment")

with mlflow.start_run():
    # mlflow.log_params(params)

    model_info = mlflow.pyfunc.log_model(
        name="word2vec_model",
        python_model="./models/word2vec.py",
        artifacts={"model_path": model_path, "recipe_path": recipe_path},
        pip_requirements=["gensim==4.3.3"]
    )
    
    # Validation 
    

Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 2900.63it/s] 
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 1110.19it/s]


🏃 View run masked-goose-28 at: http://127.0.0.1:5000/#/experiments/505961469723674826/runs/d64ee3d5309240a4a2b7c2261d6137ca
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/505961469723674826


### Validation

In [4]:
model = mlflow.pyfunc.load_model(model_info._model_uri)

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 48.22it/s]


In [None]:
def connect_database():
    """Connect to the DynamoDB database."""
    dynamodb = boto3.resource("dynamodb",
                            aws_access_key_id=st.secrets.s3.AWS_ACCESS_KEY_ID,
                            aws_secret_access_key=st.secrets.s3.AWS_SECRET_ACCESS_KEY,
                            region_name=st.secrets.s3.AWS_DEFAULT_REGION)
    table = dynamodb.Table(st.secrets.s3.DB_NAME)
    return table

table = connect_database()

user_id = 110833230122006731136 # alfredmastann@gmail.com test user
user_item = table.get_item(Key={"user_id": int(user_id)})["Item"]

liked_idx = list(map(int, user_item["liked_idx"].keys()))
disliked_idx = list(map(int, user_item["disliked_idx"].keys()))

loaded_model = mlflow.pyfunc.load_model(model_uri=model_info.model_uri)
predicted = loaded_model.predict([liked_idx, disliked_idx])

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 88.29it/s] 


In [6]:
np.asarray(json.loads(predicted))

array([5499,  583, 4381, 5703,  756, 5575, 4930,  735, 3179, 3934])

In [25]:
user_item["liked_idx"]

{'6153': '2025-07-22 22:24:01',
 '4195': '2025-07-22 22:23:28',
 '5485': '2025-07-22 22:24:00',
 '6212': '2025-07-22 22:24:17',
 '6079': '2025-07-22 22:24:44',
 '6092': '2025-07-22 22:23:59',
 '3220': '2025-07-22 22:25:03',
 '6173': '2025-07-22 22:24:33',
 '5816': '2025-07-22 22:24:49',
 '499': '2025-07-22 22:23:33',
 '995': '2025-07-22 22:25:09',
 '4005': '2025-07-22 22:24:59',
 '4226': '2025-07-22 22:23:45',
 '93': '2025-07-22 22:24:03',
 '5437': '2025-07-22 22:23:48',
 '5778': '2025-07-22 22:24:06',
 '6129': '2025-07-22 22:24:29',
 '617': '2025-07-22 22:24:43',
 '4701': '2025-07-22 22:24:47',
 '6163': '2025-07-22 22:24:07',
 '5791': '2025-07-22 22:23:57',
 '5593': '2025-07-22 22:24:22',
 '5794': '2025-07-22 22:23:26',
 '4485': '2025-07-22 22:24:41',
 '5993': '2025-07-22 22:24:31',
 '4102': '2025-07-22 22:25:17',
 '1792': '2025-07-22 22:23:37',
 '1076': '2025-07-22 22:24:55',
 '6081': '2025-07-22 22:25:05',
 '3194': '2025-07-22 22:24:42',
 '1008': '2025-07-22 22:23:35',
 '5508': '202

In [8]:
liked_df = pd.DataFrame().from_dict(user_item["liked_idx"], orient="index").reset_index().rename(columns={"index": "recipe_id", 0: "date"})
liked_df["date"] = pd.to_datetime(liked_df["date"])
liked_df["recipe_id"] = liked_df["recipe_id"].astype(int)
liked_df.sort_values(by="date", ascending=False, inplace=True)
liked_df.reset_index(drop=True, inplace=True)
like_test = liked_df.iloc[:-5, :]
like_test

Unnamed: 0,recipe_id,date
0,4102,2025-07-22 22:25:17
1,5601,2025-07-22 22:25:12
2,995,2025-07-22 22:25:09
3,6081,2025-07-22 22:25:05
4,3220,2025-07-22 22:25:03
5,4005,2025-07-22 22:24:59
6,1076,2025-07-22 22:24:55
7,149,2025-07-22 22:24:50
8,5816,2025-07-22 22:24:49
9,4701,2025-07-22 22:24:47


In [9]:
disliked_df = pd.DataFrame().from_dict(user_item["disliked_idx"], orient="index").reset_index().rename(columns={"index": "recipe_id", 0: "date"})
disliked_df["date"] = pd.to_datetime(disliked_df["date"])
disliked_df["recipe_id"] = disliked_df["recipe_id"].astype(int)
disliked_df.sort_values(by="date", ascending=False, inplace=True)
disliked_df.reset_index(drop=True, inplace=True)
dislike_test = disliked_df.iloc[:-5, :]
dislike_test

Unnamed: 0,recipe_id,date
0,4006,2025-07-22 22:25:16
1,6228,2025-07-22 22:25:15
2,219,2025-07-22 22:25:14
3,6025,2025-07-22 22:25:13
4,1053,2025-07-22 22:25:08
5,5978,2025-07-22 22:25:07
6,930,2025-07-22 22:25:06
7,3193,2025-07-22 22:25:02
8,762,2025-07-22 22:25:01
9,821,2025-07-22 22:25:00


In [10]:
prediction = loaded_model.predict([like_test["recipe_id"].values.tolist(), dislike_test["recipe_id"].values.tolist()])
prediction = np.asarray(json.loads(prediction))
prediction

array([3934, 5892, 3179, 5794, 5301, 5585, 3214, 5575, 5499, 5703])

Recall@K (K = 10)

In [27]:
np.isin(prediction, liked_df["recipe_id"].values.tolist()).sum()/len(liked_df)

0.025

Precision@K (K = 10)

In [28]:
np.isin(prediction, liked_df["recipe_id"].values.tolist()).sum()/10

0.1

### Registering

In [None]:
# Register model if validation is successful (RUN THIS TO UPDATE THE STREAMLIT MODEL)
with mlflow.start_run():
    mlflow.register_model(
        model_uri=model_info._model_uri,
        name="word2vec_model"
    )

Successfully registered model 'word2vec_model'.
2025/08/11 17:40:28 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: word2vec_model, version 1


🏃 View run amusing-stoat-209 at: http://127.0.0.1:5000/#/experiments/505961469723674826/runs/26a9c02ffea8453cb99e5b595e5f5cfd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/505961469723674826


Created version '1' of model 'word2vec_model'.


In [None]:
# Testing the API call
params = {"liked_idx": liked_idx, "disliked_idx": disliked_idx}
response = requests.get(f"http://localhost:8000/recommend/", json=params)
response.text

'"[3410, 3325, 2352, 5618, 2516, 6250, 1552, 4605, 4987, 6009]"'