# Importing required module and data

In [1]:
import pandas as pd
import numpy as np
import requests
import math
import sys
import io

In [2]:
data=pd.read_csv("http://files.grouplens.org/datasets/movielens/ml-100k/u.data",delimiter='\t',header=None)

In [3]:
df=pd.DataFrame(data)

In [4]:
df.head()

Unnamed: 0,0,1,2,3
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


# Preprosessing of the data

In [5]:
df.columns=["UserID","MovieID","rating","timestamp"]

In [6]:
df = df.sort_values(by="UserID")

In [7]:
df.head()

Unnamed: 0,UserID,MovieID,rating,timestamp
66567,1,55,5,875072688
62820,1,203,4,878542231
10207,1,183,5,875072262
9971,1,150,5,876892196
22496,1,68,4,875072688


In [8]:
df = df.drop(["timestamp"],axis=1)

In [9]:
df.head()

Unnamed: 0,UserID,MovieID,rating
66567,1,55,5
62820,1,203,4
10207,1,183,5
9971,1,150,5
22496,1,68,4


#### Creating dictionary grouping all users.
#### That it is a dictionary where keys are the users.
#### For it user it is another dictionary where keys are the movies and values are the rating.

#### Giving it name prefs which stands for preference

In [10]:
prefs={}
for index,row in df.iterrows():
    uid=row['UserID']
    mid=row['MovieID']
    rating=row['rating']
    prefs.setdefault(uid,{})
    prefs[uid][mid]=rating

# Creating recomendation system

#### Creating function sim_pearson which takes two input as user and prefs and gives the Pearson Correlation distance between them

In [11]:
def sim_pearson(dicti,p1,p2):
     sim_item={}
     for item in dicti[p1]:
         if item in dicti[p2]: 
            sim_item[item]=1

     n=len(sim_item)
     if n==0: 
         return 0

     sum_1=sum([dicti[p1][it] for it in sim_item])
     sum_2=sum([dicti[p2][it] for it in sim_item])

     sum1Sq=sum([pow(dicti[p1][it],2) for it in sim_item])
     sum2Sq=sum([pow(dicti[p2][it],2) for it in sim_item])

     pSum=sum([dicti[p1][it]*dicti[p2][it] for it in sim_item])

     num=pSum-(sum_1*sum_2/n)
     den=math.sqrt((sum1Sq-pow(sum_1,2)/n)*(sum2Sq-pow(sum_2,2)/n))
     if den==0: 
        return 0
     r=num/den
     return r

#### Example of peasson correlation between movie rating of user 1 and 34

In [12]:
sim_pearson(prefs,1,34)

0.891042111213631

### Creating a dictionary that maps MovieID to Movie Name

In [13]:
movie_data = pd.read_csv("http://files.grouplens.org/datasets/movielens/ml-100k/u.item",encoding='latin-1',delimiter='|',header=None)
movie_data = pd.DataFrame(movie_data)
movie_dict={}
for index,row in movie_data.iterrows():
    movie_id=row[0]
    movie_name=row[1]
    movie_dict.setdefault(movie_id,"")
    movie_dict[movie_id]=movie_name

In [14]:
for i in range(1,6):
    print(movie_dict[i])

Toy Story (1995)
GoldenEye (1995)
Four Rooms (1995)
Get Shorty (1995)
Copycat (1995)


### Creating recommendations for users based on other users who are close (Correlation is more than 0.7) to them and average rating is high ( Rating is more than 3.5 / 5 ) 

In [15]:
def getRecommendations(person,pdict = prefs,similarity = sim_pearson,mdict = movie_dict):
    totals={}
    simSums={}
    for other in pdict:
        if other==person: 
            continue
        sim=similarity(pdict,person,other)
        if sim <= 0.7: 
            continue

        for item in pdict[other]:
            if item not in pdict[person] or pdict[person][item]==0:
                 totals.setdefault(item,0)
                 totals[item] += pdict[other][item]*sim
                 simSums.setdefault(item,0)
                 simSums[item]+=sim


    rankings=[(total/simSums[item],mdict[item]) for item,total in totals.items( )]
    rankings.sort(reverse=True)
    
    recomended = []
    
    for rating, movie in rankings:
        if rating >= 3.5:
            recomended.append(movie)
    return(recomended)

#### Recomendation for user 1

In [16]:
getRecommendations(1)

['Postman, The (1997)',
 'Desperate Measures (1998)',
 'Women, The (1939)',
 'Winter Guest, The (1997)',
 "Ulee's Gold (1997)",
 "She's the One (1996)",
 'Shadow Conspiracy (1997)',
 'Seventh Seal, The (Sjunde inseglet, Det) (1957)',
 "Schindler's List (1993)",
 'Rear Window (1954)',
 'Philadelphia (1993)',
 'Matilda (1996)',
 'Kids (1995)',
 'Incognito (1997)',
 'Heathers (1989)',
 'Great Dictator, The (1940)',
 'Gabbeh (1996)',
 'French Kiss (1995)',
 'Boot, Das (1981)',
 'Big Lebowski, The (1998)',
 'American President, The (1995)',
 'Sense and Sensibility (1995)',
 'Indian Summer (1996)',
 'That Thing You Do! (1996)',
 'Thousand Acres, A (1997)',
 'Titanic (1997)',
 "It's a Wonderful Life (1946)",
 'It Happened One Night (1934)',
 'L.A. Confidential (1997)',
 'Rainmaker, The (1997)',
 'Mrs. Dalloway (1997)',
 'Deceiver (1997)',
 'Wedding Singer, The (1998)',
 'That Darn Cat! (1997)',
 'Switchback (1997)',
 'Star Trek: Generations (1994)',
 'Sleepers (1996)',
 'Sixth Man, The (1997)