# Basic Content Recommender

In [None]:
import os
import pandas as pd

## Import and check data

In [None]:
data_path = os.path.join('..', 'data', 'sample-data.csv')

In [None]:
df = pd.read_csv(data_path)

In [None]:
df

## Create tf-idf matrix

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel 

Key arguments for TfidfVectorizer
 * analyzer - level of analysis (i.e., word or character)
 * ngram_range - range for n-gram inclusion
 * min_df - minimum document frequency score to be included
 * stop_words - words to be removed from the matrix

In [None]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(df['description'])

In [None]:
tfidf_matrix.shape

## Create similarity matrix and rank items

In [None]:
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix) 
results = {}
for idx, row in df.iterrows():
   similar_indices = cosine_similarities[idx].argsort()[:-100:-1] 
   similar_items = [(cosine_similarities[idx][i], df['id'][i]) for i in similar_indices] 
   results[row['id']] = similar_items[1:]

In [None]:
def item(id):  
  return df.loc[df['id'] == id]['description'].tolist()[0].split(' - ')[0] 
# Just reads the results out of the dictionary.def 
def recommend(item_id, num):
    print("Recommending " + str(num) + " products similar to " + item(item_id) + "...")   
    print("-------")    
    recs = results[item_id][:num]   
    for rec in recs: 
       print("Recommended: " + item(rec[1]) + " (score:" +      str(rec[0]) + ")")

## Test Recommender

In [None]:
recommend(item_id=11, num=5)

In [None]:
recommend(item_id=50, num=10)