In [2]:
#Import all necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display

sns.set_style('white')
%matplotlib inline

#Get the data into Pandas Dataframe object
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('dataset.csv', sep = '\t', names = column_names)

#Get the Movie Titles
movie_titles = pd.read_csv('movieIdTitles.csv')

#Merge the dataset with movie titles
df = pd.merge(df, movie_titles, on = 'item_id')

### Do some Exploratory Data Analysis

ratings = pd.DataFrame(df.groupby('title')['rating'].mean())
ratings['numOfRatings'] = pd.DataFrame(df.groupby('title')['rating'].count())

plt.figure(figsize = (10,4))
ratings['numOfRatings'].hist(bins = 70)

plt.figure(figsize = (10,4))
ratings['rating'].hist(bins = 70)

sns.jointplot(x='rating', y='numOfRatings', data = ratings, alpha = 0.5)

### Create the Recommendation System

moviemat = df.pivot_table(index='user_id',columns='title',values='rating')

#Most Rated Movies with their Average Ratings
ratings.sort_values('numOfRatings', ascending = False).head(10)

# Create a correlation matrix
for i in ratings.index:
    movieUserRatings = moviemat[i]
    similarToThatMovie = moviemat.corrwith(movieUserRatings)
    corr_toMovie = pd.DataFrame(similarToThatMovie, columns = ['Correlation'])
    corr_toMovie.dropna(inplace = True)
    corr_toMovie = corr_toMovie.join(ratings['numOfRatings'])
    result = corr_toMovie[corr_toMovie['numOfRatings'] > 100].sort_values('Correlation', ascending = False).head()
    if result['numOfRatings'].count() >= 5:
        ratings.loc[i, 'FirstMovieRecommendation'] = result.iloc[1:2].index.values[0]
        ratings.loc[i, 'SecondMovieRecommendation'] = result.iloc[2:3].index.values[0]
        ratings.loc[i, 'ThirdMovieRecommendation'] = result.iloc[3:4].index.values[0]
        ratings.loc[i, 'FourthMovieRecommendation'] = result.iloc[4:5].index.values[0]

#Check the result
ratings.head()

ratings = ratings.fillna('-')

#Save the ratings data for later use
ratings.to_csv('MovieRecommendations.csv', encoding='utf-8')

# Load the Saved Recommendation Data Generated for Reusability
df_result = pd.read_csv('MovieRecommendations.csv')

#Load all the movie names
for i in df_result['title']:
    print(i)

inputMovieName = widgets.Text()

def getRecommendations(sender):
    searchMovie = inputMovieName.value
    list_result = df_result[df_result['title'] == searchMovie]
    if not list_result.empty:
        fm = list_result['FirstMovieRecommendation'].values[0]
        sm = list_result['SecondMovieRecommendation'].values[0]
        tm = list_result['ThirdMovieRecommendation'].values[0]
        fourthm = list_result['FourthMovieRecommendation'].values[0]
        print(f'Your Recommendations for the Movie {searchMovie} are:\n')
        print(f'1: {fm}')
        print(f'2: {sm}')
        print(f'3: {tm}')
        print(f'4: {fourthm}')
    else:
        print("Movie not found.")

inputMovieName.on_submit(getRecommendations)
display(inputMovieName)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 76620: invalid continuation byte

In [None]:
#Import all necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display

sns.set_style('white')
%matplotlib inline

#Get the data into Pandas Dataframe object
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('dataset.csv', sep='\t', names=column_names)

#Get the Movie Titles (with corrections for encoding and separator)
movie_titles = pd.read_csv('movieIdTitles.csv', sep='|', encoding='latin-1', header=None, names=['item_id', 'title'], usecols=[0,1])

#Merge the dataset with movie titles
df = pd.merge(df, movie_titles, on='item_id')

### Do some Exploratory Data Analysis
ratings = pd.DataFrame(df.groupby('title')['rating'].mean())
ratings['numOfRatings'] = pd.DataFrame(df.groupby('title')['rating'].count())

### Create the Recommendation System
moviemat = df.pivot_table(index='user_id',columns='title',values='rating')

# Create a correlation matrix
for i in ratings.index:
    # Skip movies that cause issues or have too few ratings to correlate
    if ratings.loc[i]['numOfRatings'] < 10:
        continue
    try:
        movieUserRatings = moviemat[i]
        similarToThatMovie = moviemat.corrwith(movieUserRatings)
        corr_toMovie = pd.DataFrame(similarToThatMovie, columns=['Correlation'])
        corr_toMovie.dropna(inplace=True)
        corr_toMovie = corr_toMovie.join(ratings['numOfRatings'])
        result = corr_toMovie[corr_toMovie['numOfRatings'] > 100].sort_values('Correlation', ascending=False).head()
        if result['numOfRatings'].count() >= 5:
            ratings.loc[i, 'FirstMovieRecommendation'] = result.iloc[1:2].index.values[0]
            ratings.loc[i, 'SecondMovieRecommendation'] = result.iloc[2:3].index.values[0]
            ratings.loc[i, 'ThirdMovieRecommendation'] = result.iloc[3:4].index.values[0]
            ratings.loc[i, 'FourthMovieRecommendation'] = result.iloc[4:5].index.values[0]
    except (KeyError, IndexError, ZeroDivisionError):
        # This handles cases where a movie might not be in moviemat or has no valid correlations
        continue

# Fill any remaining empty recommendation slots
ratings.fillna('-', inplace=True)

# Create the final result dataframe for searching
df_result = ratings.reset_index()


### How to get Recommendations?
print("--- MOVIE LIST ---")
#Load all the movie names
for i in df_result['title']:
    print(i)
print("\n--- COPY A MOVIE TITLE FROM ABOVE AND PASTE IT INTO THE BOX BELOW ---")


# --- UPDATED WIDGET CODE ---
# Set up the text widget
inputMovieName = widgets.Text(
    description='Movie Title:',
    continuous_update=False, # This stops it from running while you type
    placeholder='Copy and paste a movie title here, then press Enter'
)

# This function will run when you press Enter
def getRecommendations(change):
    searchMovie = change.new
    list_result = df_result[df_result['title'] == searchMovie]
    
    if not list_result.empty:
        fm = list_result['FirstMovieRecommendation'].values[0]
        sm = list_result['SecondMovieRecommendation'].values[0]
        tm = list_result['ThirdMovieRecommendation'].values[0]
        fourthm = list_result['FourthMovieRecommendation'].values[0]
        
        print(f'\nYour Recommendations for the Movie "{searchMovie}" are:\n')
        print(f'1: {fm}')
        print(f'2: {sm}')
        print(f'3: {tm}')
        print(f'4: {fourthm}')
    else:
        print(f'\nMovie "{searchMovie}" not found in the recommendation list.')

# Tell the widget to run the function when its value changes
inputMovieName.observe(getRecommendations, names='value')

# Display the widget
display(inputMovieName)

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  c /= stddev[:, None]
  c /= stddev[None, :]
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  c /= stddev[:, None]
  c /= stddev[None, :]
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  c /= stddev[:, None]
  c /= stddev[None, :]
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  c /= stddev[:, None]
  c /= stddev[None, :]
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  c /= stddev[None, :]
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  c /= stddev[:, None]
  c /= s