<a href="https://colab.research.google.com/github/champsleague/Machine-Learning-Projects/blob/main/ML_Test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install surprise

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 2.5 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1633994 sha256=fde33ff5442380a6042b9a3f4d7df8b79a0878cb16810328f618dc168a54bbd6
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1


In [12]:
# Matrix Factorization

import pandas as pd
import numpy as np

import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

from surprise import Dataset,Reader
from surprise.prediction_algorithms.matrix_factorization import SVD
from surprise import accuracy


ratings = pd.read_csv("ratings_small.csv")
movie_md = pd.read_csv("movies_metadata.csv")
ratings.head()


# movie dataframe with votes more than 55
movie_md = movie_md[movie_md['vote_count']>55][['id','title']]

# IDs of movies with count more than 55
movie_ids = [int(x) for x in movie_md['id'].values]

# Select ratings of movies with more than 55 counts
ratings = ratings[ratings['movieId'].isin(movie_ids)]

# Reset Index
ratings.reset_index(inplace=True, drop=True)

# Print first 5 rows
ratings.head()

ratings.shape


# Initialize a surprise reader object
reader = Reader(line_format='user item rating', sep=',', rating_scale=(0,5), skip_lines=1)

# Load the data
data = Dataset.load_from_df(ratings[['userId','movieId','rating']], reader=reader)

# Build trainset object(perform this only when you are using whole dataset to train)
trainset = data.build_full_trainset()

# Initialize model
svd = SVD()

# cross-validate
svd.fit(trainset)

svd.predict(uid=3,iid=2959,r_ui=5.0)
svd.predict(uid=15,iid=2678,r_ui=1.0)



def get_recommendations(data, movie_md, user_id, top_n, algo):
    
    # creating an empty list to store the recommended product ids
    recommendations = []
    
    # creating an user item interactions matrix 
    user_movie_interactions_matrix = data.pivot(index='userId', columns='movieId', values='rating')
    
    # extracting those product ids which the user_id has not interacted yet
    non_interacted_movies = user_movie_interactions_matrix.loc[user_id][user_movie_interactions_matrix.loc[user_id].isnull()].index.tolist()
    
    # looping through each of the product ids which user_id has not interacted yet
    for item_id in non_interacted_movies:
        
        # predicting the ratings for those non interacted product ids by this user
        est = algo.predict(user_id, item_id).est
        
        # appending the predicted ratings
        movie_name = movie_md[movie_md['id']==str(item_id)]['title'].values[0]
        recommendations.append((movie_name, est))

    # sorting the predicted ratings in descending order
    recommendations.sort(key=lambda x: x[1], reverse=True)

    return recommendations[:top_n]
    # returing top n highest predicted rating products for this user


get_recommendations(data=ratings,movie_md=movie_md, user_id=654, top_n=10, algo=svd)



[('Galaxy Quest', 4.888718539522184),
 ('Birdman of Alcatraz', 4.869300511961511),
 ('Flags of Our Fathers', 4.85261815327589),
 ('The Thomas Crown Affair', 4.843495790986965),
 ('Scarface', 4.810964301229698),
 ('While You Were Sleeping', 4.809595301079908),
 ('Ghost Rider', 4.8055622157473215),
 ('Nell', 4.783567713137495),
 ('Straw Dogs', 4.778832854310195),
 ('Dead Man', 4.771431939423963)]