# WBSflix recommenders

In [1]:
import pandas as pd
import random

In [2]:
links = pd.read_csv('wbsflix-dataset/ml-latest-small/links.csv')
movies = pd.read_csv('wbsflix-dataset/ml-latest-small/movies.csv')
ratings = pd.read_csv('wbsflix-dataset/ml-latest-small/ratings.csv')
tags = pd.read_csv('wbsflix-dataset/ml-latest-small/tags.csv')

### Function that takes as an input a user id and outputs the top n movies of the user:

In [3]:
def top_movies(df_rate, df_movies, name, range_of_days:float, n):
    top_movies_for_id = []
    df_merge = (df_rate
                .merge(df_movies, on='movieId')
                .assign(time_count = round(abs(ratings['timestamp']-max(ratings['timestamp']))/60/60/24))
                .sort_values('time_count')
                .filter(['userId', 'rating', 'title', 'time_count'])
                .query('time_count<=@range_of_days')
               )
    df_pivot = pd.pivot_table(df_merge, values='rating', columns='title', index='userId')
    df_movie_list = df_pivot.columns.values.tolist()
    random.shuffle(df_movie_list)
    for value in df_movie_list:
        if df_pivot.loc[name][value] >= 4:
            top_movies_for_id.append(value)
            if len(top_movies_for_id) == n:
                break
    return top_movies_for_id

In [4]:
top_movies(ratings, movies, 1, 150, 5)

['Indiana Jones and the Last Crusade (1989)',
 '20 Dates (1998)',
 'Easy Rider (1969)',
 'Office Space (1999)',
 'Beetlejuice (1988)']

### Function to get the most popular movies

In [5]:
def pop_movies(df_rate, df_movies, rate_tresh, range_of_days:float):
    df_merge = (df_rate
                .merge(df_movies, on='movieId')
                .assign(time_count = round(abs(ratings['timestamp']-max(ratings['timestamp']))/60/60/24))
                .sort_values('time_count')
                .filter(['userId', 'rating', 'title', 'time_count'])
                .query('time_count<=@range_of_days')
               )
    df = df_merge.groupby('title').agg(rate_count=('rating','count'), rate_mean=('rating','mean')).query('rate_count >= @rate_tresh').sort_values('rate_mean', ascending=False)
    return df.index.to_list()

In [6]:
pop_movies(ratings, movies, 20, 90)

['Toy Story 2 (1999)',
 'V for Vendetta (2006)',
 'Social Network, The (2010)',
 'Moneyball (2011)',
 'Star Trek (2009)',
 'Last Samurai, The (2003)',
 'Star Trek II: The Wrath of Khan (1982)',
 'Wrestler, The (2008)',
 'Sin City (2005)',
 'Dances with Wolves (1990)',
 'Easy Rider (1969)',
 'Pretty Woman (1990)',
 'Jumanji (1995)',
 'Snowpiercer (2013)',
 'Spider-Man (2002)',
 'Talented Mr. Ripley, The (1999)',
 'Man of Steel (2013)',
 'Ice Age 2: The Meltdown (2006)',
 'Poseidon Adventure, The (1972)']

### Item-based Collaborative Filtering: Function which outputs the top n most similar movies to top rated movies of a user

In [7]:
def item_based_recommender(df_rate, df_movies, top_movies, range_of_days:float, tresh_n):
    recommend_movies = []
    df_merge = (df_rate
                .merge(df_movies, on='movieId')
                .assign(time_count = round(abs(ratings['timestamp']-max(ratings['timestamp']))/60/60/24))
                .sort_values('time_count')
                .filter(['userId', 'rating', 'title', 'time_count'])
                .query('time_count<=@range_of_days')
               )
    corr = pd.pivot_table(df_merge, values='rating', columns='title', index='userId').dropna(axis='index', thresh=tresh_n).corr()
    top_corr = corr.filter(top_movies).round(1)
    num_list = [0.9, 1.0]
    for i in num_list: 
        for index, value in top_corr.iterrows():
            if i in value.values:
                recommend_movies.append(index)
        return recommend_movies

In [8]:
my_list = top_movies(ratings, movies, 1, 150, 5)
item_based_recommender(ratings, movies, my_list, 200, 20)

['24 Hour Party People (2002)',
 'Double Indemnity (1944)',
 'Eragon (2006)',
 'Exorcist, The (1973)',
 'Gods Must Be Crazy, The (1980)',
 'Importance of Being Earnest, The (2002)',
 'Last Samurai, The (2003)',
 'Lucy (2014)',
 'Max (2002)',
 'Mission to Mars (2000)',
 'Peter Pan (2003)',
 'Poseidon Adventure, The (1972)',
 'Waking Life (2001)']

### Function for Recom-Chat-Bot

In [9]:
def item_based_chat_bot(recom_list, user_id):
    print(f'''Hi {user_id}! I am your personal recommender.
    Would you like me to recommend you some popular movies based on your previous ratings?''')
    print(f'''Type [y]/[n]''')
    x=input()
    if(x == 'y'):
        for movie in recom_list:
            print(f'''{movie}''')
    else:
        print(f'''Goodbye!''')

In [10]:
item_based_chat_bot(item_based_recommender(ratings, movies, top_movies(ratings, movies, 1, 90, 5), 200, 20), 1)

Hi 1! I am your personal recommender.
    Would you like me to recommend you some popular movies based on your previous ratings?
Type [y]/[n]


 y


Exorcist, The (1973)
Last Samurai, The (2003)
Mission to Mars (2000)


In [11]:
item_based_chat_bot(item_based_recommender(ratings, movies, top_movies(ratings, movies, 1, 90, 5), 200, 20), 1)

Hi 1! I am your personal recommender.
    Would you like me to recommend you some popular movies based on your previous ratings?
Type [y]/[n]


 n


Goodbye!


### Function to find movies

In [12]:
def movie_id_finder():
    print("What movie are you looking for?")
    title = input().lower()
    df = (movies
          .filter(['movieId', 'title'])
          .assign(searching = lambda x: x['title'].str.lower().str.contains(title))
          )
    df = df.drop(df[df['searching']==False].index)
    if df.shape[0] > 1:
        print("Which one of the following movies do you mean? ")
        movie_titles = df['title'].tolist()
        n_movie = 1
        for movie in movie_titles:
            print("\t" + movie + " [type " + str(n_movie) + "]")
            n_movie += 1
        n_movie = input()
        selected_movie = movie_titles[int(n_movie)-1]
        df = df.query('title==@selected_movie')
        print(f"The selected movie has the id: {df['movieId'].tolist()[0]}")
    else: print("Sorry, no movie has been found!")

In [13]:
movie_id_finder()

What movie are you looking for?


 nemo


Which one of the following movies do you mean? 
	Johnny Mnemonic (1995) [type 1]
	Little Nemo: Adventures in Slumberland (1992) [type 2]
	Finding Nemo (2003) [type 3]


 3


The selected movie has the id: 6377
