In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.model_selection import train_test_split
import random
from tqdm import tqdm

# Load dataset
df = pd.read_csv("/content/drive/MyDrive/recommendation_system/food_order.csv")

# Filter out "Not given" ratings
df = df[df["rating"] != "Not given"]
df["rating"] = df["rating"].astype(int)

# Encode users and items
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()
df["user"] = user_encoder.fit_transform(df["customer_id"])
df["item"] = item_encoder.fit_transform(df["restaurant_name"])

num_users = df["user"].nunique()
num_items = df["item"].nunique()

# Sort and split (leave-one-out)
df = df.sort_values(["user", "order_id"])
test = df.groupby("user").tail(1)
train = df.drop(test.index)

# Build negative samples for training
def generate_training_data(train_df, num_items, num_neg=4):
    user_input, item_input, labels = [], [], []

    user_item_set = set(zip(train_df.user, train_df.item))

    for (u, i) in tqdm(user_item_set):
        user_input.append(u)
        item_input.append(i)
        labels.append(1)

        for _ in range(num_neg):
            j = random.randint(0, num_items - 1)
            while (u, j) in user_item_set:
                j = random.randint(0, num_items - 1)
            user_input.append(u)
            item_input.append(j)
            labels.append(0)

    return np.array(user_input), np.array(item_input), np.array(labels)

train_users, train_items, train_labels = generate_training_data(train, num_items)

# Define Keras model
def build_model(num_users, num_items, embed_dim=32):
    user_input = Input(shape=(1,))
    item_input = Input(shape=(1,))

    user_embed = Embedding(num_users, embed_dim)(user_input)
    item_embed = Embedding(num_items, embed_dim)(item_input)

    user_vec = Flatten()(user_embed)
    item_vec = Flatten()(item_embed)

    x = Concatenate()([user_vec, item_vec])
    x = Dense(64, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=[user_input, item_input], outputs=output)
    model.compile(optimizer=Adam(0.001), loss=BinaryCrossentropy(), metrics=['accuracy'])
    return model

model = build_model(num_users, num_items)

# Train model
model.fit([train_users, train_items], train_labels, batch_size=128, epochs=5, verbose=1)

# Evaluate using Hit Ratio@10
def hit_ratio_at_k(model, test_df, num_items, k=10):
    hits = 0
    for _, row in tqdm(test_df.iterrows(), total=len(test_df)):
        u = row["user"]
        true_item = row["item"]

        # Generate 99 negative samples
        negative_items = set()
        while len(negative_items) < 99:
            j = random.randint(0, num_items - 1)
            if j != true_item:
                negative_items.add(j)
        test_items = list(negative_items) + [true_item]

        users = np.full(len(test_items), u)
        predictions = model.predict([users, np.array(test_items)], verbose=0).flatten()

        top_k_items = np.argsort(predictions)[-k:]
        recommended_items = np.array(test_items)[top_k_items]

        if true_item in recommended_items:
            hits += 1

    return hits / len(test_df)

hr10 = hit_ratio_at_k(model, test, num_items, k=10)
print(f"\n🎯 Hit Ratio@10: {hr10:.4f}")


100%|██████████| 261/261 [00:00<00:00, 130369.58it/s]


Epoch 1/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.7559 - loss: 0.6814
Epoch 2/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8052 - loss: 0.6387 
Epoch 3/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8078 - loss: 0.5797 
Epoch 4/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8051 - loss: 0.5027 
Epoch 5/5
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8137 - loss: 0.4184 


100%|██████████| 859/859 [01:46<00:00,  8.10it/s]


🎯 Hit Ratio@10: 0.4761



