In [8]:
import pandas as pd
import numpy as np 
import seaborn as sns 
import matplotlib.pyplot as plt 
import re

In [9]:
movies = pd.read_csv("Datasets/movies.csv")
ratings = pd.read_csv("Datasets/ratings.csv")

In [10]:
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
62418,209157,We (2018),Drama
62419,209159,Window of the Soul (2001),Documentary
62420,209163,Bad Poems (2018),Comedy|Drama
62421,209169,A Girl Thing (2001),(no genres listed)


In [11]:
def Extract_Year(String:str):
    try:
        return int(re.findall(pattern=r"(\d+)",string=String)[0])
    except IndexError: 
        return 0

In [12]:
def clean_title(String:str):
        return re.sub(pattern=r"[(\d+)]",repl="",string=String)

In [13]:
movies["Year"] = movies.apply(lambda x:Extract_Year(x["title"]),axis=1)

In [14]:
movies["title"] = movies.apply(lambda x:clean_title(x["title"]),axis=1)

In [15]:
movies

Unnamed: 0,movieId,title,genres,Year
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
1,2,Jumanji,Adventure|Children|Fantasy,1995
2,3,Grumpier Old Men,Comedy|Romance,1995
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995
4,5,Father of the Bride Part II,Comedy,1995
...,...,...,...,...
62418,209157,We,Drama,2018
62419,209159,Window of the Soul,Documentary,2001
62420,209163,Bad Poems,Comedy|Drama,2018
62421,209169,A Girl Thing,(no genres listed),2001


In [16]:
ratings

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510
...,...,...,...,...
25000090,162541,50872,4.5,1240953372
25000091,162541,55768,2.5,1240951998
25000092,162541,56176,2.0,1240950697
25000093,162541,58559,4.0,1240953434


In [17]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
tfidf = TfidfVectorizer(stop_words="english")
movies_vectors= tfidf.fit_transform(movies["genres"])

In [38]:
movies_vectors.shape

(62423, 23)

In [23]:
Movies_similarities = cosine_similarity(movies_vectors,movies_vectors)

In [84]:
def Recommend_Movie(user_id:int,Movies_similarities:np.ndarray,ratings:pd.DataFrame,movies:pd.DataFrame):

    #! Get watched Movies by user ID and rating larger >= 3  
    favorite_Movie = ratings[(ratings["userId"] == user_id) & (ratings["rating"]>=3)]["movieId"].values

    #! Sum similarity Matrix column to get similarity score for movies to the watched movies
    similar_scores = np.sum(Movies_similarities[favorite_Movie - 1],axis=0)

    #! Sorting similarity array to get most relevant content for watched Movies
    recommended_destinations_idx = np.argsort(similar_scores)[::-1]

    recommends = []
    for idx in recommended_destinations_idx:
        if movies.iloc[idx]["movieId"] not in favorite_Movie:
            recommends.append(movies.iloc[idx])
        if len(recommends) >= 10:
            break
    
    return pd.DataFrame(recommends)

In [89]:
def get_Most_favorite_types(user_id:int):

    most = []
    favorite_Movie = ratings[(ratings["userId"] == user_id) & (ratings["rating"] >= 3)]["movieId"]

    for movie_id in favorite_Movie:
        most.extend(movies.iloc[movie_id]["genres"].split("|"))

    genres, counts = np.unique(np.array(most), return_counts=True)

    sorted_indices = np.argsort(counts)[::-1]
    for idx in sorted_indices:
        print(f"{genres[idx]}: {counts[idx]}")

In [87]:
user_id = 1
Recommend_Movie(user_id=user_id,Movies_similarities=Movies_similarities,ratings=ratings,movies=movies)

Unnamed: 0,movieId,title,genres,Year
491,496,What Happened Was...,Comedy|Drama|Romance|Thriller,1994
33298,142839,Vaaranam Aayiram,Comedy|Drama|Romance|Thriller,2008
5555,5666,"Rules of Attraction, The",Comedy|Drama|Romance|Thriller,2002
37414,152593,Bhagam Bhag,Comedy|Drama|Romance|Thriller,2006
50012,180089,Showgirls : Penny's from Heaven,Comedy|Drama|Romance|Thriller,2
22219,113949,Aaron Loves Angela,Comedy|Drama|Romance|Thriller,1975
15531,81804,Wild Target,Comedy|Drama|Romance|Thriller,2010
15598,82119,The Reverse,Comedy|Drama|Romance|Thriller,2009
15949,84176,Waxworks Das Wachsfigurenkabinett,Comedy|Drama|Romance|Thriller,1924
39,40,"Cry, the Beloved Country",Drama,1995


In [None]:
get_Most_favorite_types(user_id=user_id)

Drama: 34
Comedy: 25
Romance: 15
Horror: 6
Sci-Fi: 6
Adventure: 4
Crime: 4
Thriller: 4
War: 3
Western: 2
Fantasy: 2
Musical: 2
Children: 2
Action: 2
Mystery: 1
Film-Noir: 1
Animation: 1
