In [1]:
import pandas as pd
import numpy as np
import sklearn.metrics.pairwise as pw
from scipy import sparse

## Importing Data

In [2]:
tv_shows = pd.read_csv("tv_shows.csv")
ratings = pd.read_csv("rating.csv")

In [3]:
tv_shows.head()

Unnamed: 0,show_id,Title,Year,Age,IMDb,Rotten Tomatoes
0,1,Breaking Bad,2008,18+,9.5,96%
1,2,Stranger Things,2016,16+,8.8,93%
2,3,Money Heist,2017,18+,8.4,91%
3,4,Sherlock,2010,16+,9.1,78%
4,5,Better Call Saul,2015,18+,8.7,97%


In [4]:
ratings.head()

Unnamed: 0,userId,show_id,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


## Dropping Columns

In [5]:
tv_shows = tv_shows.drop(['Age'],axis=1)
tv_shows.head()

Unnamed: 0,show_id,Title,Year,IMDb,Rotten Tomatoes
0,1,Breaking Bad,2008,9.5,96%
1,2,Stranger Things,2016,8.8,93%
2,3,Money Heist,2017,8.4,91%
3,4,Sherlock,2010,9.1,78%
4,5,Better Call Saul,2015,8.7,97%


In [6]:
ratings = ratings.drop(['timestamp'],axis=1)
ratings.head()

Unnamed: 0,userId,show_id,rating
0,1,1,4.0
1,1,3,4.0
2,1,6,4.0
3,1,47,5.0
4,1,50,5.0


## Merging of Data frames

In [7]:
merged_df = pd.merge(tv_shows,ratings,on='show_id')
merged_df.head()

Unnamed: 0,show_id,Title,Year,IMDb,Rotten Tomatoes,userId,rating
0,1,Breaking Bad,2008,9.5,96%,1,4.0
1,1,Breaking Bad,2008,9.5,96%,5,4.0
2,1,Breaking Bad,2008,9.5,96%,7,4.5
3,1,Breaking Bad,2008,9.5,96%,15,2.5
4,1,Breaking Bad,2008,9.5,96%,17,4.5


In [8]:
merged_df.head(10)

Unnamed: 0,show_id,Title,Year,IMDb,Rotten Tomatoes,userId,rating
0,1,Breaking Bad,2008,9.5,96%,1,4.0
1,1,Breaking Bad,2008,9.5,96%,5,4.0
2,1,Breaking Bad,2008,9.5,96%,7,4.5
3,1,Breaking Bad,2008,9.5,96%,15,2.5
4,1,Breaking Bad,2008,9.5,96%,17,4.5
5,1,Breaking Bad,2008,9.5,96%,18,3.5
6,1,Breaking Bad,2008,9.5,96%,19,4.0
7,1,Breaking Bad,2008,9.5,96%,21,3.5
8,1,Breaking Bad,2008,9.5,96%,27,3.0
9,1,Breaking Bad,2008,9.5,96%,31,5.0


## Item Based Collaborative Recommender based on Pearson Correlation Coefficient

In [9]:
def get_similar_shows(item_similarity_df,show_name, user_rating):
    similar_score = item_similarity_df[show_name] * (user_rating - 2.5)
    similar_score = similar_score.sort_values(ascending = False)
    return similar_score

def item_based_recomm_PPC(merged_df,user_rated):
    item_based_pivot = merged_df.pivot_table(index=['userId'], columns=['Title'], values='rating')
    item_based_pivot = item_based_pivot.fillna(0)
    item_similarity_df = item_based_pivot.corr(method='pearson')
    similar_shows = pd.DataFrame()
    for show,user_rating in user_rated:
        similar_shows=similar_shows.append(get_similar_shows(item_similarity_df,show,user_rating),ignore_index=True)
    return similar_shows.sum().sort_values(ascending=False)

## Item Based Collaborative Recommender based on Cosine Similarity

In [14]:
def item_based_recomm_cos(merged_df,user_rated):    
    pivot_item_based = pd.pivot_table(merged_df,
                                      index='Title',
                                      columns=['userId'], values='rating')  
    sparse_pivot = sparse.csr_matrix(pivot_item_based.fillna(0))
    recommender = pw.cosine_similarity(sparse_pivot)
    recommender_df = pd.DataFrame(recommender, 
                                  columns=pivot_item_based.index,
                                  index=pivot_item_based.index)
    cosine_df = pd.DataFrame()
    ## Item Rating Based Cosine Similarity
    for film_name,rating in user_rated:
        cosine_df = cosine_df.append(recommender_df[film_name].sort_values(ascending=False),ignore_index=True)
    return cosine_df.sum().sort_values(ascending=False)

## Final Recommender

In [11]:
user_rated=[
            ("Money Heist",5),("Breaking Bad",4),("Sherlock",5),("Dexter",3),
            ("House of Cards",4),("Game of Arms",1),("The Tesla Files",1),
            ("Sacred Games",4),("Arrow",3),("Narcos",4),("13 Reasons Why",3)
           ]

### Testing PCC

In [12]:
similar_shows = item_based_recomm_PPC(merged_df,user_rated)
#similar_shows = removingSeen(similar_shows,user_seen)
similar_shows.head(20)

Money Heist                             3.208718
Sherlock                                3.081348
House of Cards                          2.714440
Love 101                                2.709749
Cedar Cove                              2.597199
AMO                                     2.567269
March Comes in Like a Lion              2.567269
Derek                                   2.567269
Requiem                                 2.567269
Queen of the South                      2.567269
Marianne                                2.567269
The Night Shift                         2.567269
Dead Set                                2.567269
TURN: Washington's Spies                2.567269
Code Geass: Lelouch of the Rebellion    2.567269
Saint Seiya                             2.567269
Valor                                   2.552564
American Crime Story                    2.508513
Crashing                                2.459302
Narcos                                  2.386352
dtype: float64

### Testing Cosine Similarity

In [15]:
cosine_df = item_based_recomm_cos(merged_df,user_rated)
cosine_df.head(20)

Love 101                                      2.326520
Breaking Bad                                  2.233499
Valor                                         2.225801
House of Cards                                2.215297
Marvel's Daredevil                            2.206822
Grey's Anatomy                                2.127112
Money Heist                                   2.117221
Crashing                                      2.100044
Wynonna Earp                                  2.092137
Community                                     2.014876
Daria                                         2.011102
#blackAF                                      2.000997
Killer Inside: The Mind of Aaron Hernandez    1.998416
Fuller House                                  1.997048
Black Mirror                                  1.989951
Arthdal Chronicles                            1.980702
The World's Most Extraordinary Homes          1.970427
Arrow                                         1.967249
The Magici