In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Load the data
column_names = ['user_id', 'movie_id', 'rating', 'timestamp']
data = pd.read_csv('u.data', sep='\t', names=column_names)

# Load movie titles
movie_titles = pd.read_csv('u.item', sep='|', header=None, encoding='latin-1')
movie_titles = movie_titles[[0, 1]]
movie_titles.columns = ['movie_id', 'title']

# Merge the dataframes
data = pd.merge(data, movie_titles, on='movie_id')

# Normalize user and movie IDs
data['user_id'] = data['user_id'].astype('category').cat.codes.values
data['movie_id'] = data['movie_id'].astype('category').cat.codes.values

# Split the data into training and testing sets
train, test = train_test_split(data, test_size=0.2, random_state=42)


In [2]:
class RecommenderNet(tf.keras.Model):
    def __init__(self, num_users, num_movies, embedding_size, **kwargs):
        super(RecommenderNet, self).__init__(**kwargs)
        self.num_users = num_users
        self.num_movies = num_movies
        self.user_embedding = tf.keras.layers.Embedding(num_users + 1, embedding_size, embeddings_initializer='he_normal')
        self.user_bias = tf.keras.layers.Embedding(num_users + 1, 1)
        self.movie_embedding = tf.keras.layers.Embedding(num_movies + 1, embedding_size, embeddings_initializer='he_normal')
        self.movie_bias = tf.keras.layers.Embedding(num_movies + 1, 1)

    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        user_bias = self.user_bias(inputs[:, 0])
        movie_vector = self.movie_embedding(inputs[:, 1])
        movie_bias = self.movie_bias(inputs[:, 1])

        dot_user_movie = tf.tensordot(user_vector, movie_vector, 2)

        x = dot_user_movie + user_bias + movie_bias

        return tf.nn.sigmoid(x)

num_users = len(data['user_id'].unique())
num_movies = len(data['movie_id'].unique())
embedding_size = 50

model = RecommenderNet(num_users, num_movies, embedding_size)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))


In [3]:
train_data = tf.data.Dataset.from_tensor_slices((train[['user_id', 'movie_id']].values, train['rating'].values))
test_data = tf.data.Dataset.from_tensor_slices((test[['user_id', 'movie_id']].values, test['rating'].values))

batch_size = 64
train_data = train_data.shuffle(len(train)).batch(batch_size)
test_data = test_data.batch(batch_size)

history = model.fit(train_data, epochs=3, validation_data=test_data)


Epoch 1/3
Epoch 2/3
Epoch 3/3


In [22]:
def add_new_user_preferences(data, new_user_preferences, movie_titles):
    # Create a DataFrame for new user preferences
    new_user_ratings = pd.DataFrame(new_user_preferences)

    # Encode the movie titles
    movie_to_id = {v: k for k, v in movie_titles[['movie_id', 'title']].to_dict('split')['data']}
    new_user_ratings['movie_id'] = new_user_ratings['movie_title'].map(movie_to_id)

    # Normalize ratings to match the training data
    new_user_ratings['user_id'] = data['user_id'].max() + 1  # Assign a new unique user ID
    new_user_ratings['rating'] = new_user_ratings['user_rating'] / 5.0  # Scale ratings to [0, 1]

    # Drop unnecessary columns
    new_user_ratings = new_user_ratings[['user_id', 'movie_id', 'rating']]

    # Append to the existing data using concat
    augmented_data = pd.concat([data, new_user_ratings], ignore_index=True)
    return augmented_data, new_user_ratings['user_id'].iloc[0]

# Function to recommend movies
def recommend_movies(model, data, new_user_preferences, movie_titles, top_k=10, retrain_epochs=1):
    # Add new user preferences to the dataset
    augmented_data, new_user_id = add_new_user_preferences(data, new_user_preferences, movie_titles)

    # Create datasets
    train_data = tf.data.Dataset.from_tensor_slices((augmented_data[['user_id', 'movie_id']].values, augmented_data['rating'].values))
    batch_size = 64
    train_data = train_data.shuffle(len(augmented_data)).batch(batch_size)

    # Retrain the model briefly with the new user preferences
    model.fit(train_data, epochs=retrain_epochs, verbose=1)

    # Generate predictions for all movies for the new user
    all_movies = movie_titles['movie_id'].values
    user_movie_array = np.hstack((np.array([new_user_id] * len(all_movies)).reshape(-1, 1), all_movies.reshape(-1, 1)))

    predictions = model.predict(user_movie_array).flatten()
    top_indices = predictions.argsort()[-top_k:][::-1]

    recommended_movie_ids = all_movies[top_indices]
    recommended_movies = movie_titles[movie_titles['movie_id'].isin(recommended_movie_ids)]

    return recommended_movies['title'].values

# Reload the original dataset
data = pd.read_csv('u.data', sep='\t', names=column_names)
data = pd.merge(data, movie_titles, on='movie_id')
data['user_id'] = data['user_id'].astype('category').cat.codes.values
data['movie_id'] = data['movie_id'].astype('category').cat.codes.values
data['rating'] = data['rating'] / 5.0  # Normalize ratings

# Build and compile the model again to reset it
num_users = len(data['user_id'].unique())
num_movies = len(data['movie_id'].unique())
embedding_size = 50

model = RecommenderNet(num_users, num_movies, embedding_size)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))

# Train the model on the original data
train_data = tf.data.Dataset.from_tensor_slices((data[['user_id', 'movie_id']].values, data['rating'].values))
train_data = train_data.shuffle(len(data)).batch(batch_size)
model.fit(train_data, epochs=5, validation_data=test_data)

# Test the recommendations
new_user_preferences_1 = [
    {"movie_title": "Lion King, The (1994)", "user_rating": 5.0},
    {"movie_title": "Akira (1988)", "user_rating": 5.0},
    {"movie_title": "Cinderella (1950)", "user_rating": 4.0},
    {"movie_title": "Aladdin and the King of Thieves (1996)", "user_rating": 4.0},
    {"movie_title": "Dumbo (1941)", "user_rating": 4.0}
]

new_user_preferences_2 = [
    {"movie_title": "Star Wars (1977)", "user_rating": 5.0},
    {"movie_title": "Stargate (1994)", "user_rating": 5.0},
    {"movie_title": "Robert A. Heinlein's The Puppet Masters (1994)", "user_rating": 4.0},
    {"movie_title": "Jurassic Park (1993)", "user_rating": 4.0},
    {"movie_title": "Twelve Monkeys (1995)", "user_rating": 4.0},
    {"movie_title": "Terminator 2: Judgment Day (1991)", "user_rating": 4.0}
]

recommendations_1 = recommend_movies(model, data, new_user_preferences_1, movie_titles)
print(recommendations_1)
recommendations_2 = recommend_movies(model, data, new_user_preferences_2, movie_titles)
print(recommendations_2)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
['Twelve Monkeys (1995)' 'Seven (Se7en) (1995)'
 'Muppet Treasure Island (1996)' 'Braveheart (1995)' 'I.Q. (1994)'
 'Santa Clause, The (1994)' 'James and the Giant Peach (1996)'
 'Philadelphia Story, The (1940)' 'Vertigo (1958)' 'M (1931)']
['Seven (Se7en) (1995)' 'Ed Wood (1994)' 'I.Q. (1994)'
 'Professional, The (1994)' 'Santa Clause, The (1994)'
 'Monty Python and the Holy Grail (1974)' 'Client, The (1994)'
 'Spy Hard (1996)' 'Vertigo (1958)' 'Some Like It Hot (1959)']


In [5]:
model.save('recommendation_model', save_format='tf')

In [6]:
!zip -r file.zip recommendation_model

from google.colab import files
files.download("file.zip")

  adding: recommendation_model/ (stored 0%)
  adding: recommendation_model/assets/ (stored 0%)
  adding: recommendation_model/saved_model.pb (deflated 87%)
  adding: recommendation_model/variables/ (stored 0%)
  adding: recommendation_model/variables/variables.index (deflated 56%)
  adding: recommendation_model/variables/variables.data-00000-of-00001 (deflated 8%)
  adding: recommendation_model/fingerprint.pb (stored 0%)
  adding: recommendation_model/keras_metadata.pb (deflated 81%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>