<a href="https://colab.research.google.com/github/jeffdavidson343/movie_recomendation_project/blob/main/movie_recommendation_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)

In user-based collaborative filtering, the system identifies similar users based on their past behaviors and preferences. If user A and user B have similar patterns of interactions and preferences for items, the system might recommend items that user B has liked to user A. This method relies on finding neighbors (similar users) and then suggesting items that the neighbors have shown interest in.

In [None]:
ratings = pd.read_csv(path/'u.data', delimiter = '\t', header = None, names = ['user','movie','rating','timestamp'])
ratings.head()

Unnamed: 0,user,movie,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [None]:
#we are viewing the movie ratings given by the users as well as a movie title table.
movies = pd.read_csv(path/'u.item', delimiter = '|', encoding = 'latin-1', usecols = (0,1), names = ('movie','title'), header = None)
movies.head()

Unnamed: 0,movie,title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [None]:
#We merge these tables as they have a common column called movies as well as add a column called title. We store this data in dls using the CollabDataLoaders class from fast.ai which automates the creation of a dataset. The batch size of the dataset is 64.
ratings = ratings.merge(movies)
dls = CollabDataLoaders.from_df(ratings, item_name = 'title', bs = 64)
dls.show_batch()

Unnamed: 0,user,title,rating
0,130,Chasing Amy (1997),4
1,788,"Cowboy Way, The (1994)",2
2,892,Carrie (1976),3
3,833,"Truth About Cats & Dogs, The (1996)",2
4,741,My Best Friend's Wedding (1997),3
5,177,"Princess Bride, The (1987)",4
6,804,"Omen, The (1976)",3
7,297,Face/Off (1997),5
8,521,Snow White and the Seven Dwarfs (1937),3
9,771,"Silence of the Lambs, The (1991)",1


In [None]:
#The number of users is the length of the class user and the number of movies is the length of the class title. We have chosen the number of latent factors to be 5. The factors have been randomly generated using torch.randn.
n_users = len(dls.classes['user'])
n_movies = len(dls.classes['title'])
n_factors = 5
user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)

In [None]:
#defines a PyTorch model for a recommendation system. The model is based on the DotProductBias layer, which calculates the dot product of user and movie factors, along with their corresponding biases. The trained model can then be used to predict movie ratings for users based on their movie features.
def create_params(size):
  return nn.Parameter(torch.zeros(*size).normal_(0,0.1))
class DotProductBias(Module):
  def __init__(self, n_users, n_movies, n_factors, y_range = (0, 5.5)):
    self.user_factors = create_params([n_users, n_factors])
    self.user_bias = create_params([n_users])
    self.movie_factors = create_params([n_movies, n_factors])
    self.movie_bias = create_params([n_movies])
    self.y_range = y_range
  def forward(self,x):
    users = self.user_factors[x[:,0]]
    movies = self.movie_factors[x[:,1]]
    res = (users*movies).sum(dim=1)
    res += self.user_bias[x[:,0]] + self.movie_bias[x[:,1]]
    return sigmoid_range(res, *self.y_range)

In [None]:
# training a recommendation system using the DotProductBias model. The trained model can then be used to predict movie ratings for users based on their movie features.
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls , model , loss_func= MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd = 0.1)

epoch,train_loss,valid_loss,time
0,1.019551,0.974941,00:09
1,0.849969,0.85382,00:08
2,0.648706,0.836834,00:09
3,0.496403,0.838177,00:09
4,0.369411,0.841324,00:08


In [None]:
#The final result is a list of 5 movie titles, representing the top 5 highest rated movies based on their movie biases.
movie_bias = learn.model.movie_bias.squeeze()
idxs = movie_bias.argsort()[:5]
[dls.classes['title'][i] for i in idxs]

['Children of the Corn: The Gathering (1996)',
 'Grease 2 (1982)',
 'Lawnmower Man 2: Beyond Cyberspace (1996)',
 "McHale's Navy (1997)",
 'Showgirls (1995)']

In [None]:
#finds the indices that would sort the movie bias array in descending order.
idxs = movie_bias.argsort(descending=True)[:5]
[dls.classes['title'][i] for i in idxs]

['Titanic (1997)',
 'Shawshank Redemption, The (1994)',
 "Schindler's List (1993)",
 'Star Wars (1977)',
 'Good Will Hunting (1997)']

In [None]:
#So, the two code snippets essentially do the same thing, but with different approaches to find the indices of the top 5 highest rated movies.