In [18]:
import pandas as pd
import torch
import gradio as gr
from fastai.collab import *
from fastai.tabular.all import *
from pathlib import Path

In [19]:
# Load ratings data
base_path = Path.cwd() / 'ml-1m'

ratings = pd.read_csv(base_path / 'ratings.dat', sep='::', engine='python', names=['user', 'movie', 'rating', 'timestamp'])

# Convert IDs to strings for categorical processing
ratings['user'] = ratings['user'].astype(str)
ratings['movie'] = ratings['movie'].astype(str)

In [20]:
# Load movie titles
movies = pd.read_csv(base_path / 'movies.dat', sep='::', engine='python', encoding='latin-1', names=['movie_id', 'title', 'genres'])

# Dictionary to show string instead of IDs
movies['movie_id'] = movies['movie_id'].astype(str)
movie_id_to_title = dict(zip(movies['movie_id'], movies['title']))

# # Show all movies
# print(movies[['movie_id', 'title']])

In [21]:
# Create dataloaders
dls = CollabDataLoaders.from_df(ratings, user_name='user', item_name='movie', rating_name='rating', bs=64)

In [22]:
# Create and train model
learn = collab_learner(dls, n_factors=50, y_range=(0,5.5), metrics=rmse)

# 5 epochs, 0.0005 learning rate
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,_rmse,time
0,0.792953,0.870507,0.93301,01:21
1,0.686489,0.920276,0.95931,01:19
2,0.57186,0.877945,0.936987,01:40
3,0.467346,0.841483,0.917324,01:46
4,0.43338,0.83083,0.911499,01:31


In [23]:
# Predict rating for a user/movie 
def predict_rating(user, movie_title):
    user = str(user)
    movie_title_lower = movie_title.lower().strip()

    # Handle casing
    title_to_id = {title.lower(): mid for mid, title in movie_id_to_title.items()}
    
    # Find movie ID by title
    matching_ids = [mid for title, mid in title_to_id.items() if movie_title_lower in title]
    
    if not matching_ids:
        return f"Movie title '{movie_title}' not found."
    
    movie = matching_ids[0]  # take the first match

    if user not in dls.classes['user']:
        return f"User {user} not found."
    if movie not in dls.classes['movie']:
        return f"Movie '{movie_title}' not found in training data."

    test_df = pd.DataFrame([[user, movie]], columns=['user', 'movie'])
    test_dl = dls.test_dl(test_df)
    
    batch = first(test_dl)

    learn.model.cpu()
    batch = tuple(b.cpu() for b in batch)

    with torch.no_grad():
        pred = learn.model(*batch)

    pred_rating = pred.item()
    
    return f"Predicted rating by user {user} for '{movie_id_to_title[movie]}': {pred_rating:.2f}"

# User input
user_input = input("Enter user ID: ")
movie_title_input = input("Enter movie title: ")
result = predict_rating(user_input, movie_title_input)
print(result)

Predicted rating by user 24 for 'X-Men (2000)': 3.39


In [24]:
# Find movies similar to target movie 
def movies_similar_to(movie_title, n=5):
    # Inverse mapping: title -> movie_id
    inv_map = {v: k for k, v in movie_id_to_title.items()}
    movie_title_lower = movie_title.lower().strip()
    inv_map = {v.lower(): k for k, v in movie_id_to_title.items()}
    movie_id = None
    for title, mid in inv_map.items():
        if movie_title_lower in title:
            movie_id = mid
            break
        
    movie_classes = list(dls.classes['movie'])  # convert CategoryMap to list
    if movie_id is None:
        return []
    movie_idx = movie_classes.index(movie_id)

    movie_embs = learn.model.i_weight.weight.data
    target_emb = movie_embs[movie_idx]

    sims = torch.nn.functional.cosine_similarity(target_emb.unsqueeze(0), movie_embs)
    top_idxs = sims.topk(n + 1).indices.tolist()  # +1 to include itself
    top_idxs = [i for i in top_idxs if i != movie_idx][:n]  # exclude itself

    similar_movies = [movie_id_to_title[dls.classes['movie'][i]] for i in top_idxs]
    return similar_movies

movie_input = input("Enter a movie title: ")
results = movies_similar_to(movie_input)
if not results:
    print("Movie not found.")
else:
    print(f"Movies similar to '{movie_input}':")
    for m in results:
        print(f"- {m}")

Movies similar to 'Star Wars: Episode IV - A New Hope':
- Star Wars: Episode V - The Empire Strikes Back (1980)
- Raiders of the Lost Ark (1981)
- Star Wars: Episode VI - Return of the Jedi (1983)
- Star Wars: Episode I - The Phantom Menace (1999)
- Young Sherlock Holmes (1985)
