### Imports :

In [1]:
import pandas as pd
import numpy as np
from movies import movies_genres,get_movie_id,get_movie_name,get_movie_year

from similarity_metrics import product_similarity,cosine_similarity
from content_filtring import get_most_similar, get_recommendations
from utils import load_data

### Loading Data : 

In [2]:
users,movies,ratings = load_data()
nb_users = users.shape[0]
print("Users shape",users.shape)
print("Movies shape",movies.shape)
print("ratings shape",ratings.shape)

Users shape (6040, 5)
Movies shape (3883, 21)
ratings shape (1000209, 3)


In [3]:
users.head()

Unnamed: 0,user_id,gender,age,occupation,zip_code
0,0,F,1,10,48067
1,1,M,56,16,70072
2,2,M,25,15,55117
3,3,M,45,7,2460
4,4,M,25,20,55455


In [4]:
movies.head()

Unnamed: 0,movie_id,title,year,Animation,Children's,Comedy,Adventure,Fantasy,Romance,Drama,...,Crime,Thriller,Horror,Sci-Fi,Documentary,War,Musical,Mystery,Film-Noir,Western
0,0,Toy Story,1995,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,Jumanji,1995,0.0,1.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,Grumpier Old Men,1995,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,Waiting to Exhale,1995,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,Father of the Bride Part II,1995,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,0,1176,5
1,0,655,3
2,0,902,3
3,0,3339,4
4,0,2286,5


## Recomndation systems :

### 1) Contenet based filtring :

Content-based filtering uses item features to recommend other items **similar** to what the user likes, based on their previous actions or explicit feedback. **We dont use other users information !**
For example, if user `A` liked `Harry Potter 1`, he/she will like `Harry Potter 2` and we **don't need to look at  the prefrences of other users.**

This measure will operate on the characteristics (**features**) of the movies to determine which are close. In our case, we have access to the genres of the movies. For example, the genres of `Toy Story` are: `Animation`, `Children's` and `Comedy`. This is represented as follow in our dataset:

In [6]:
head_line = movies_genres()
movies_df= movies[head_line]

In [7]:
movies_df.head()

Unnamed: 0,title,Animation,Children's,Comedy,Adventure,Fantasy,Romance,Drama,Action,Crime,Thriller,Horror,Sci-Fi,Documentary,War,Musical,Mystery,Film-Noir,Western
0,Toy Story,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Jumanji,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Grumpier Old Men,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Waiting to Exhale,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Father of the Bride Part II,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### calulate similarity

In [8]:
x=np.array(movies_df.iloc[:,1:], np.ndarray)
x.shape

(3883, 18)

In [9]:
similarity=product_similarity(x,x.T)
similarity_cosine=cosine_similarity(x)

(3883, 18)
(18, 3883)
(3883, 3883)


In [10]:
similarity

array([[3, 1, 1, ..., 0, 0, 0],
       [1, 3, 0, ..., 0, 0, 0],
       [1, 0, 2, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 1, 1, 1],
       [0, 0, 0, ..., 1, 1, 1],
       [0, 0, 0, ..., 1, 1, 2]])

In [11]:
similarity_cosine

array([[1.        , 0.33333333, 0.40824829, ..., 0.        , 0.        ,
        0.        ],
       [0.33333333, 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.40824829, 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [12]:
get_most_similar(movies,similarity,'Toy Story')

[(0, 'Toy Story', 3),
 (667, 'Space Jam', 3),
 (3685, 'Adventures of Rocky and Bullwinkle, The', 3),
 (3682, 'Chicken Run', 3),
 (2009, 'Jungle Book, The', 3),
 (2011, 'Lady and the Tramp', 3),
 (2012, 'Little Mermaid, The', 3),
 (2033, 'Steamboat Willie', 3),
 (2072, 'American Tail, An', 3),
 (2073, 'American Tail: Fievel Goes West, An', 3)]

In [20]:
get_most_similar(movies,similarity_cosine,'Toy Story')

[(0, 'Toy Story', 1.0),
 (12, 'Balto', 0.816496580927726),
 (4, 'Father of the Bride Part II', 0.5773502691896257),
 (7, 'Tom and Huck', 0.408248290463863),
 (6, 'Sabrina', 0.408248290463863),
 (11, 'Dracula: Dead and Loving It', 0.408248290463863),
 (2, 'Grumpier Old Men', 0.408248290463863),
 (3, 'Waiting to Exhale', 0.408248290463863),
 (10, 'American President, The', 0.33333333333333326),
 (1, 'Jumanji', 0.33333333333333326)]

In [21]:
get_recommendations(movies,ratings,0,similarity_cosine)

Unnamed: 0,movie_id,title,similarity
0,13,Nixon,1.0
20,0,Toy Story,1.0
10,12,Balto,0.816497
2,15,Casino,0.707107
3,3,Waiting to Exhale,0.707107


In [22]:
get_recommendations(movies,ratings,0,similarity)

Unnamed: 0,movie_id,title,similarity
14,1526,Hercules,3
15,3090,Fantasia 2000,3
28,2072,"American Tail, An",3
27,2033,Steamboat Willie,3
26,2012,"Little Mermaid, The",3


In [24]:
get_recommendations(movies,ratings,999,similarity_cosine)

Unnamed: 0,movie_id,title,similarity
10,13,Nixon,1.0
1,13,Nixon,0.707107
13,3,Waiting to Exhale,0.707107
12,15,Casino,0.707107
11,16,Sense and Sensibility,0.707107


In [26]:
get_recommendations(movies,ratings,999,similarity)

Unnamed: 0,movie_id,title,similarity
0,166,First Knight,2
2,1451,Smilla's Sense of Snow,2
3,503,"Perfect World, A",2
4,3197,Man Bites Dog (C'est arriv� pr�s de chez vous),2
5,1458,"Devil's Own, The",2
