In [3]:
import numpy as np 
import pandas as pd
from math import sqrt,trunc

# Loading User Ratings and Movies 

In [23]:
df_rating=pd.read_csv('u.data',delimiter='\t',header=None)
df_rating.drop(labels=[3],inplace=True,axis=1) #Filtering out irrelevant columns
df_rating.columns=['UserID','MovieID','Rating']
df_rating.head()

Unnamed: 0,UserID,MovieID,Rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [24]:
df_movie=pd.read_csv('u.item',delimiter='|',encoding='ANSI',header=None)
df_movie.drop(labels=[i for i in range(2,24)],inplace=True,axis=1) #Only MovieID and Movie are required for our purposes
df_movie.columns=['MovieID','Movie']
df_movie.head()

Unnamed: 0,MovieID,Movie
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


# Similarity Measures (Euclidean Distance and Pearson Correlation)

In [25]:
def sim_distance(prefs,p1,p2,measure='PC'):
# Get the list of shared_items
    si={}
    if measure=='ED': #Euclidean Distance
  
      for item in prefs[p1]:
           if item in prefs[p2]:
                si[item]=1
# if they have no ratings in common, return 0
      if len(si)==0: 
                  return 0
# Add up the squares of all the differences
      sum_of_squares=sum([pow(prefs[p1][item]-prefs[p2][item],2) for item in prefs[p1] if item in prefs[p2]])
      return 1/(1+sum_of_squares)
    if measure=='PC': #Pearson Correlation
        for item in prefs[p1]:
            if item in prefs[p2]: 
                si[item]=1
        n=len(si)
# if they are no ratings in common, return 0
        if n==0: 
            return 0
# Add up all the preferences
        sum1=sum([prefs[p1][it] for it in si])
        sum2=sum([prefs[p2][it] for it in si])
# Sum up the squares
        sum1Sq=sum([pow(prefs[p1][it],2) for it in si])
        sum2Sq=sum([pow(prefs[p2][it],2) for it in si])
# Sum up the products
        pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
# Calculate Pearson score
        num=pSum-(sum1*sum2/n)
        den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
        if den==0: 
            return 0
        r=num/den
        return r

# Recommendation Function: Output in Raw form 

In [128]:
def getRecommendations_raw(prefs,person,similarity='PC',n=10):
    totals={}
    simSums={}
    for other in prefs:
          # don't compare the same person
        if other==person: 
            continue
        sim=round(sim_distance(prefs,person,other,similarity),3)
          # Similarity of 0 is redundant
        if sim<=0: 
            continue
        for item in prefs[other]:
           # only score movies not seen yet
            if item not in prefs[person] or prefs[person][item]==0:
                    # Similarity * Score
                    totals.setdefault(item,0)
                    totals[item]+=prefs[other][item]*sim
                    # Sum of similarities
                    simSums.setdefault(item,0)
                    simSums[item]+=sim

     # Create the normalized list
    rankings=[(round(total/simSums[item],6),item) for item,total in totals.items( )]
     # Return the sorted list
    rankings.sort( )
    rankings.reverse( )
    return rankings[:n]
    #Output is a list of tuples (Predicted Rating, MovieID)

# Creating a dictionary for the preferences of all users 

In [134]:
pref={}
i=1
for i in set(df_rating['UserID']):
    pref[i]=df_rating.set_index('UserID').loc[i].set_index('MovieID').to_dict()['Rating']
#pref is a dictionary of dictionaries. Key being UserID. Each dictionary contains all rated movies by that User.

# Final Recommendation Function


In [114]:
def getRecommendations(prefs,person,similarity='PC',n=10):
    A=[]
    for rating,movieID in getRecommendations_raw(pref,person,similarity,n):
        A.append((rating,df_movie['Movie'].loc[df_movie['MovieID']==movieID].item()))
    print("Top {} movies for User {}:".format(n,person))
    return  pd.DataFrame(A,columns=['Predicted Rating','Movie'])

# A few demonstrations 

In [121]:

getRecommendations(pref,139)

Top 10 movies for User 139:


Unnamed: 0,Predicted Rating,Movie
0,5.0,Little City (1998)
1,5.0,Entertaining Angels: The Dorothy Day Story (1996)
2,5.0,"Magic Hour, The (1998)"
3,5.0,Aiqing wansui (1994)
4,5.0,Santa with Muscles (1996)
5,5.0,"Saint of Fort Washington, The (1993)"
6,5.0,"Boys, Les (1997)"
7,5.0,Sliding Doors (1998)
8,5.0,Anna (1996)
9,5.0,The Deadly Cure (1996)


In [122]:
getRecommendations(pref,299)

Top 10 movies for User 299:


Unnamed: 0,Predicted Rating,Movie
0,5.0,Entertaining Angels: The Dorothy Day Story (1996)
1,5.0,Hugo Pool (1997)
2,5.0,Someone Else's America (1995)
3,5.0,Aiqing wansui (1994)
4,5.0,Santa with Muscles (1996)
5,5.0,Tough and Deadly (1995)
6,5.0,"Saint of Fort Washington, The (1993)"
7,5.0,Delta of Venus (1994)
8,5.0,Star Kid (1997)
9,5.0,Marlene Dietrich: Shadow and Light (1996)


In [127]:
getRecommendations(pref,740)

Top 10 movies for User 740:


Unnamed: 0,Predicted Rating,Movie
0,5.0,Entertaining Angels: The Dorothy Day Story (1996)
1,5.0,Hugo Pool (1997)
2,5.0,Santa with Muscles (1996)
3,5.0,"Saint of Fort Washington, The (1993)"
4,5.0,Star Kid (1997)
5,5.0,Marlene Dietrich: Shadow and Light (1996)
6,5.0,Prefontaine (1997)
7,5.0,"Great Day in Harlem, A (1994)"
8,4.767582,Pather Panchali (1955)
9,4.733138,Anna (1996)
