# Recommender System
___
This file features the development of the Recommendation System

In [1]:
import pandas as pd # for data preprocessing
from sklearn.feature_extraction.text import TfidfVectorizer # to convert words into a sparse matrix
from sklearn.metrics.pairwise import sigmoid_kernel # to see how similiar items are

In [2]:
movies = pd.read_csv("data/movies.csv").sample(frac=1, random_state=51)

In [3]:
movies.head()

Unnamed: 0,Title,Director,Cast,Genre,Combined
59,Kakanfo,David Dida Tella,Bimbo Oshin Antar Laniyan Dele Odule,Adventure Drama,Bimbo Oshin Antar Laniyan Dele Odule David Did...
64,Citation,Kunle Afolayan,Temi Otedola Jimmy Jean-Lewis Kunle Afolayan J...,Drama Thriller,Temi Otedola Jimmy Jean-Lewis Kunle Afolayan J...
123,King of Thieves (Ogundabede),Adebayo Tijani and Tope Adebayo Salami,Toyin Abraham Femi Adebayo Salami Odunlade Ade...,Drama,Toyin Abraham Femi Adebayo Salami Odunlade Ade...
99,Mimi,Samuel Olatunji,Ali Baba Ireti Doyle Toyin Abraham Prince Jide...,Romance Comedy,Ali Baba Ireti Doyle Toyin Abraham Prince Jide...
58,Fate of Alakada: The Party Planner,Kayode Kasum,Toyin Abraham Mercy Eke Broda Shaggi,Action Comedy,Toyin Abraham Mercy Eke Broda Shaggi Kayode Ka...


In [4]:
# coverting data into a sparse matrix
tfv = TfidfVectorizer(min_df=3, strip_accents="unicode", analyzer="word", ngram_range=(1, 3), stop_words="english")

In [5]:
# transforming the data
count_matrix = tfv.fit_transform(movies['Combined'])

In [6]:
# Getting the similiarity of words in the column
similarity = sigmoid_kernel(count_matrix, count_matrix)

In [7]:
import joblib
joblib.dump(similarity, "similarity_joblib")

['similarity_joblib']

In [8]:
def recommend(title):
    # get index of movie
    index = movies.loc[movies["Title"] == title].index[0]
    
    # get index  of movies similair to user's movie
    similiar_movies = list(enumerate(similarity[index]))
    similiar_movies_index = sorted(similiar_movies, key=lambda x:x[1], reverse=True)

    # Get top 10 most similiar movies by index
    movies_index = similiar_movies_index[1:11]
    
    # Extracting the movie names using their index
    movie = []
    for i in range(len(movies_index)):
        movie_name = movies_index[i][0]
        movie.append(movies["Title"][movie_name])
    return movie

In [13]:
recommend("Couple of Days")

['The Herbert Macaulay Affair',
 'Badamasi',
 'Akpe: Return of the Beast',
 'The Good Husband',
 'Up North',
 'Chains',
 "Sobi's Mystic",
 'Kpali',
 'Mama Drama',
 'Breaded Life']