# Content based
The Content-Based Recommender relies on the similarity of the items being recommended. The basic idea is that if you like an item, then you will also like a “similar” item. It generally works well when it’s easy to determine the context/properties of each item.

# Code from scratch 

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
import sys

In [None]:
movies=pd.read_csv('../Data/movies.csv')

In [None]:
movies

In [None]:
list_genres=set()
for i in movies['genres']:
    list_genres=list_genres.union(set(i.split('|')))
list_genres=list(list_genres)

# Map text to int

In [None]:
for genre in list_genres:
    temp=[]
    for i in range(len(movies)):
        if str(genre) in str(movies['genres'][i]):
            temp.append(1)
        else:
            temp.append(0)
    movies[genre]=temp

In [None]:
movies

# Calculate document frequency

In [None]:
df=dict.fromkeys(list_genres,0)
for genre in list_genres:
    df[genre]=sum(movies[genre])

In [None]:
plt.figure(figsize=(10,5))
plt.bar(df.keys(),df.values())
plt.xticks(rotation=90)

In [None]:
movie_arr=np.array(movies)

# Calculate TF-IDF
<img src="tf.jpeg">

In [None]:
import math
N=len(movies)
count_genres=len(list_genres)
for i in range(len(movies)):
    for genre in list_genres:
        if movies.loc[i,genre]!=0:
            movies.loc[i,genre]=float(1.0/count_genres*math.log10(N*1.0/(df[genre]+1)))

In [None]:
movies

# Calculate cosin similarity
<img src="cosin.png">
Sentence 2 is more likely to be using Term 2 than using Term 1. Vice-versa for Sentence 1. The method of calculating this relative measure is calculated by taking the cosine of the angle between the sentences and the terms. 

In [None]:
indices = pd.Series(movies.index, index=movies['title'])
titles=movies['title']
movie_vector=np.array(movies)[:,4:]

In [None]:
def get_value(i,vector):
    vec1=movie_vector[i]
    a= np.dot(vec1,vector)*1.0/(np.linalg.norm(vec1)*np.linalg.norm(vector))
    return a

def get_20_nearest(vector):
    cosin_sim={}
    for i in range(len(movies)):
        cosin_sim[i]=get_value(i,vector)
    sorted_order=sorted(cosin_sim.items(),key=lambda x: x[1],reverse=True)
    return sorted_order[1:20]

In [None]:
def genre_recommendation(movie_name):
    index=indices[movie_name]
    vector=np.array(movies.loc[index])[4:]
    pair=get_20_nearest(vector)
    index_list=[i[0] for i in pair]
    return titles.iloc[index_list]

In [None]:
genre_recommendation('Good Will Hunting (1997)')

In [None]:
genre_recommendation('Toy Story (1995)')

# Using library

In [None]:
movies['genres']=movies['genres'].str.split('|')
movies['genres']=movies['genres'].fillna("").astype('str')

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
tf=TfidfVectorizer(analyzer='word',ngram_range=(1,1),
                  min_df=0,stop_words='english')
tfidf_matrix=tf.fit_transform(movies['genres'])
tfidf_matrix.shape

In [None]:
from sklearn.metrics.pairwise import linear_kernel
cosine_sim=linear_kernel(tfidf_matrix,tfidf_matrix)

In [None]:
titles=movies['title']
indices=pd.Series(movies.index,index=movies['title'])

In [None]:
def get_movie_recommendation(movie_name):
    index=indices[movie_name]
    similar_scores=list(enumerate(cosine_sim[index]))
    sim_scores=sorted(similar_scores,key=lambda x:x[1],reverse=True)
    list_result=sim_scores[1:20]
    movie_index=[i[0] for i in list_result]
    return titles.iloc[movie_index]

In [24]:
get_movie_recommendation('Good Will Hunting (1997)').head(20)

25                                       Othello (1995)
26                                  Now and Then (1995)
29    Shanghai Triad (Yao a yao yao dao waipo qiao) ...
30                               Dangerous Minds (1995)
35                              Dead Man Walking (1995)
39                      Cry, the Beloved Country (1995)
42                                   Restoration (1995)
52                                      Lamerica (1994)
54                                       Georgia (1995)
56                         Home for the Holidays (1995)
61                            Mr. Holland's Opus (1995)
66                                      Two Bits (1995)
77                           Crossing Guard, The (1995)
79         White Balloon, The (Badkonake Sefid ) (1995)
81                      Antonia's Line (Antonia) (1995)
82      Once Upon a Time... When We Were Colored (1995)
89                   Journey of August King, The (1995)
92                               Beautiful Girls