# Model Based Colaborative Filtering

### step 1- Import Libraries


In [64]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.decomposition import TruncatedSVD

### step 2-Import Data

In [65]:
frame=pd.read_csv("ml-100k/u.data",sep="\t",names=["user_id","item_id","rating","timestamp"])
frame.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [86]:
movies=pd.read_csv("ml-100k/u.item",sep="|",names=['item_id', 'movie title', 'release date', 'video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
          'Animation', 'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
          'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'],encoding="latin-1")
movie_names=movies[["item_id","movie title"]]
movie_names

Unnamed: 0,item_id,movie title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)
...,...,...
1677,1678,Mat' i syn (1997)
1678,1679,B. Monkey (1998)
1679,1680,Sliding Doors (1998)
1680,1681,You So Crazy (1994)


### Step 3-Merging Both Datasets

In [67]:
combined_movies_data=pd.merge(frame,movie_names,on="item_id")
combined_movies_data.head()

Unnamed: 0,user_id,item_id,rating,timestamp,movie title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


#### We are using the popularity based recommendation which considers the count of the ratings given to that particular movie

In [68]:
combined_movies_data.groupby("item_id")["rating"].count().sort_values(ascending=False).head()

item_id
50     583
258    509
100    508
181    507
294    485
Name: rating, dtype: int64

##### according to the data...item_id 50 is rated 583 times which is the highest rated movie

WE ARE FINDING OUT THE MOVIE WITH THE ITEM NO.50

In [69]:
highest_rated=combined_movies_data[combined_movies_data["item_id"]==50]
highest_rated["movie title"].unique()

array(['Star Wars (1977)'], dtype=object)

The most popular movie is Star Wars(1977)

### Step-4 Building a Utility Matrix

In [70]:
rating_crosstab=combined_movies_data.pivot_table(values="rating",index="user_id",columns="movie title",fill_value=0)
rating_crosstab.head()

movie title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,2,5,0,0,3,4,0,0,...,0,0,0,5,3,0,0,0,4,0
2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,2,0,0,0,0,4,0,0,...,0,0,0,4,0,0,0,0,4,0


In [71]:
rating_crosstab.shape

(943, 1664)

###### We use SVD_Truncated ALgo to recommend Items

### Transposing the Utility Matrix

In [72]:
X=rating_crosstab.values.T
X.shape

(1664, 943)

### Decomposing the Matrix using SVD

In [73]:
model=TruncatedSVD(n_components=12,random_state=17)
res_matrix=model.fit_transform(X)

In [81]:
res_matrix.shape

(1664, 12)

### Generating a Corelation Matrix

In [75]:
corr_mat=np.corrcoef(res_matrix)
corr_mat

array([[ 1.        , -0.10298113,  0.52210159, ...,  0.39854553,
         0.22143017,  0.5039286 ],
       [-0.10298113,  1.        ,  0.06549218, ...,  0.16134137,
         0.5091753 ,  0.23355053],
       [ 0.52210159,  0.06549218,  1.        , ...,  0.7658073 ,
         0.44348034,  0.19721751],
       ...,
       [ 0.39854553,  0.16134137,  0.7658073 , ...,  1.        ,
         0.18088492,  0.10342131],
       [ 0.22143017,  0.5091753 ,  0.44348034, ...,  0.18088492,
         1.        ,  0.18524109],
       [ 0.5039286 ,  0.23355053,  0.19721751, ...,  0.10342131,
         0.18524109,  1.        ]])

#### We got Star Wars as the Highest Rated Movie...SO we Isolate the Star Wars from the Correlation Matrix

In [76]:
movies_names=rating_crosstab.columns
movies_list=list(movies_names)


star_wars=movies_list.index("Star Wars (1977)")
star_wars

1398

In [88]:
corr_star_wars=corr_mat[1398]
movies_names.shape

(1664,)

#### We recommend the user based on the correlation score of the Star Wars Movie

## Recommendations

In [94]:
list(movies_names[(corr_star_wars<1.0) & (corr_star_wars > 0.87)])


['Alien (1979)',
 'Aliens (1986)',
 'Blade Runner (1982)',
 'Braveheart (1995)',
 'Die Hard (1988)',
 'Empire Strikes Back, The (1980)',
 'Fugitive, The (1993)',
 'Hunt for Red October, The (1990)',
 'Indiana Jones and the Last Crusade (1989)',
 'Men in Black (1997)',
 'Raiders of the Lost Ark (1981)',
 'Return of the Jedi (1983)',
 'Rumble in the Bronx (1995)',
 'Star Trek: First Contact (1996)',
 'Star Trek: The Wrath of Khan (1982)',
 'Terminator 2: Judgment Day (1991)',
 'Terminator, The (1984)',
 'Toy Story (1995)',
 'Twelve Monkeys (1995)']

These can be the recommendations to the User.

In [91]:
list(movies_names[(corr_star_wars<1.0) & (corr_star_wars > 0.95)])

['Return of the Jedi (1983)']

THIS IS THE HIGHEST RECOMMENDED MOVIE TO THE USER.

# Content-Based Recommender System

### Step 1- Import Nearest Neighbors Agorithm


We are Using mtcars Dataset which has data regarding the different cars with various factors

In [95]:
from sklearn.neighbors import NearestNeighbors

In [101]:
cars=pd.read_csv("mtcars.csv")
cars.columns=['car_names', 'mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']

In [102]:
cars.head()

Unnamed: 0,car_names,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


#### AIM: We want to recommend a buyer whose request is to find a car with MPG=15 DISP=300 HP=160 WT=3.2

In [112]:
t=[15,300,160,3.2]
X=cars.iloc[:,[1,3,4,6]].values
X[:5]

array([[ 21.   , 160.   , 110.   ,   2.62 ],
       [ 21.   , 160.   , 110.   ,   2.875],
       [ 22.8  , 108.   ,  93.   ,   2.32 ],
       [ 21.4  , 258.   , 110.   ,   3.215],
       [ 18.7  , 360.   , 175.   ,   3.44 ]])

In [114]:
n_neighbors=1
model=NearestNeighbors(n_neighbors=n_neighbors).fit(X)
model

NearestNeighbors(n_neighbors=1)

In [116]:
model.kneighbors([t])

(array([[10.77474942]]), array([[22]]))

In [119]:
cars[["car_names"]].iloc[22]

car_names    AMC Javelin
Name: 22, dtype: object

Therefore,AMC Javelin is the Recommended Car to the User.