## Collaborative Filtering Recommender System with Python
### TensorFlow for Collaborative Filtering

In [3]:
pwd

'/mnt/e/tutor/NLP/recomendationSystem'

In [4]:
# Step 1: Preparing the Data
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import gradio as gr
import numpy as np

# Load the MovieLens 100K dataset
# !wget http://files.grouplens.org/datasets/movielens/ml-100k.zip
# !unzip ml-100k.zip

# Load movie titles for recommendations
movie_titles = pd.read_csv('MovieLens-100k/ml-100k/u.item', sep='|', encoding='latin-1', usecols=[0, 1], names=['item_id', 'title'])

# Load the ratings data
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
ratings = pd.read_csv('MovieLens-100k/ml-100k/u.data', sep='\t', names=column_names)

# Preprocess data
ratings['user_id'] = ratings['user_id'] - 1  # Adjust to 0-indexing
ratings['item_id'] = ratings['item_id'] - 1  # Adjust to 0-indexing

# Split into training and validation sets
train_ratings, val_ratings = train_test_split(ratings, test_size=0.2, random_state=42)

In [5]:
column_names,ratings

(['user_id', 'item_id', 'rating', 'timestamp'],
        user_id  item_id  rating  timestamp
 0          195      241       3  881250949
 1          185      301       3  891717742
 2           21      376       1  878887116
 3          243       50       2  880606923
 4          165      345       1  886397596
 ...        ...      ...     ...        ...
 99995      879      475       3  880175444
 99996      715      203       5  879795543
 99997      275     1089       1  874795795
 99998       12      224       2  882399156
 99999       11      202       3  879959583
 
 [100000 rows x 4 columns])

In [6]:
# Step 2: Define the Neural Network Model
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout

class CollaborativeFilteringModel(Model):
    def __init__(self, num_users, num_items, embedding_dim=50):
        super(CollaborativeFilteringModel, self).__init__()
        
        # User and item embedding layers
        self.user_embedding = Embedding(input_dim=num_users, output_dim=embedding_dim, embeddings_initializer="he_normal", embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        self.item_embedding = Embedding(input_dim=num_items, output_dim=embedding_dim, embeddings_initializer="he_normal", embeddings_regularizer=tf.keras.regularizers.l2(1e-6))
        
        # Dense layers
        self.fc1 = Dense(128, activation='relu')
        self.fc2 = Dense(64, activation='relu')
        self.fc3 = Dense(1, activation='linear')
        self.dropout = Dropout(0.2)

    def call(self, inputs):
        user_id, item_id = inputs
        
        # Look up embeddings
        user_embedded = Flatten()(self.user_embedding(user_id))
        item_embedded = Flatten()(self.item_embedding(item_id))
        
        # Concatenate user and item embeddings
        x = Concatenate()([user_embedded, item_embedded])
        
        # Pass through dense layers
        x = self.dropout(self.fc1(x))
        x = self.dropout(self.fc2(x))
        rating = self.fc3(x)
        
        return rating

In [7]:
# Step 3: Compile and Train the Model
num_users = ratings['user_id'].nunique()
num_items = ratings['item_id'].nunique()
embedding_dim = 50

# Instantiate and compile the model
model = CollaborativeFilteringModel(num_users, num_items, embedding_dim)
model.compile(optimizer='adam', loss='mse', metrics=['mse'])

# Prepare inputs for training
train_user_ids = train_ratings['user_id'].values
train_item_ids = train_ratings['item_id'].values
train_ratings_values = train_ratings['rating'].values

val_user_ids = val_ratings['user_id'].values
val_item_ids = val_ratings['item_id'].values
val_ratings_values = val_ratings['rating'].values

# Train the model
history = model.fit(
    [train_user_ids, train_item_ids], train_ratings_values,
    validation_data=([val_user_ids, val_item_ids], val_ratings_values),
    batch_size=64,
    epochs=10
)

Epoch 1/10


2024-12-19 06:30:01.512648: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-12-19 06:30:01.599479: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-12-19 06:30:01.599563: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-12-19 06:30:01.605247: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-12-19 06:30:01.605346: I external/local_xla/xla/stream_executor

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
# Step 4: Define the Gradio Interface for Recommendations
def gradio_recommend_tf(user_id, num_recommendations=5):
    # Adjust for 0-indexing
    user_id = user_id - 1
    unrated_movies = ratings[~ratings['item_id'].isin(ratings[ratings['user_id'] == user_id]['item_id'])]
    unrated_movie_ids = unrated_movies['item_id'].unique()
    
    predictions = []
    for movie_id in unrated_movie_ids:
        predicted_rating = model.predict([np.array([user_id]), np.array([movie_id])])[0][0]
        predictions.append((movie_id, predicted_rating))
    
    # Sort by predicted rating
    top_movies = sorted(predictions, key=lambda x: x[1], reverse=True)[:num_recommendations]
    
    # Get movie titles
    recommendations = []
    for movie_id, score in top_movies:
        movie_name = movie_titles[movie_titles['item_id'] == movie_id]['title'].values[0]
        recommendations.append(f"Movie: {movie_name} (Movie ID: {movie_id+1}, Predicted Score: {score:.2f})")
    
    return "\n".join(recommendations)

# Gradio interface
iface = gr.Interface(
    fn=gradio_recommend_tf,
    inputs=[
        gr.Number(label="User ID"),
        gr.Number(label="Number of Recommendations")
    ],
    outputs="text",
    title="Neural Network Movie Recommender System with TensorFlow",
    description="Enter a user ID to get movie recommendations based on similar users using a neural network."
)

iface.launch()

* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




Created dataset file at: .gradio/flagged/dataset1.csv
