In [1]:
# Importing all required libraries, modules
import os
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib
from sqlalchemy import create_engine
import psycopg2  # PostgreSQL connector

In [2]:
# Import Dataset
anime = pd.read_csv(r"C:\Users\Swapnil Mishra\Desktop\DS_Codes_Swapnil\Recommendation_Engine\RecommendationEngine_flask\anime.csv", encoding='utf8')

In [3]:
# Database Connection (PostgreSQL)

In [4]:
# Upload the Table into Database
engine = create_engine("postgresql://postgres:swapnil1989@localhost:5432/recommenddb")

In [5]:
anime.to_sql('anime', con=engine, if_exists='append', chunksize=1000, index=False)

12294

In [6]:
# Read the Table (data) from PostgreSQL database
con = psycopg2.connect(
    host='localhost',
    port='5432',
    user='postgres',
    password='swapnil1989',
    dbname='recommenddb'
)

In [7]:
cur = con.cursor()
cur.execute('SELECT * FROM anime')
df = cur.fetchall()

In [8]:
anime = pd.DataFrame(df)
anime = anime.rename({0: 'anime_id'}, axis=1)
anime = anime.rename({1: 'name'}, axis=1)
anime = anime.rename({2: 'genre'}, axis=1)
anime = anime.rename({3: 'type'}, axis=1)
anime = anime.rename({4: 'episodes'}, axis=1)
anime = anime.rename({5: 'rating'}, axis=1)
anime = anime.rename({6: 'members'}, axis=1)

In [9]:
# Check for Missing values
anime["genre"].isnull().sum()

62

In [10]:
# Impute the Missing values in 'genre' column for a movie with 'General' category
anime["genre"] = anime["genre"].fillna("General")

In [11]:
# Create a Tfidf Vectorizer to remove all stop words
tfidf = TfidfVectorizer(stop_words="english")  # taking stop words from tfidf vectorizer

In [12]:
# Transform a count matrix to a normalized tf-idf representation
tfidf_matrix = tfidf.fit(anime.genre)

In [13]:
# Save the Pipeline for tfidf matrix
joblib.dump(tfidf_matrix, 'matrix')

['matrix']

In [14]:
os.getcwd()

'C:\\Users\\Swapnil Mishra\\Desktop\\DS_Codes_Swapnil\\Recommendation_Engine\\RecommendationEngine_flask'

In [15]:
mat = joblib.load("matrix")
tfidf_matrix = mat.transform(anime.genre)

In [16]:
tfidf_matrix.shape 

(12294, 47)

In [17]:
# Computing the cosine similarity on Tfidf matrix
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)
joblib.dump(cosine_sim_matrix, 'cosine_matrix')

['cosine_matrix']

In [18]:
# Create a mapping of anime name to index number
anime_index = pd.Series(anime.index, index=anime['name']).drop_duplicates()

In [19]:
# Example
anime_id = anime_index["No Game No Life Movie"]
anime_id

10919

In [20]:
# Custom Function to Find the TopN Movies to be Recommended
def get_recommendations(Name, topN):
    anime_id = anime_index[Name]
    
    cosine_scores = list(enumerate(cosine_sim_matrix[anime_id]))
    
    cosine_scores = sorted(cosine_scores, key=lambda x: x[1], reverse=True)
    
    cosine_scores_N = cosine_scores[0: topN + 1]
    
    anime_idx = [i[0] for i in cosine_scores_N]
    anime_scores = [i[1] for i in cosine_scores_N]
    
    anime_similar_show = pd.DataFrame(columns=["name", "Score"])
    anime_similar_show["name"] = anime.loc[anime_idx, "name"]
    anime_similar_show["Score"] = anime_scores
    anime_similar_show.reset_index(inplace=True)
    
    return anime_similar_show.iloc[1:, ]

In [21]:
rec = get_recommendations("No Game No Life Movie", topN=10)
print(rec)

    index                                               name     Score
1   10919                              No Game No Life Movie  1.000000
2   10436  Super Real Mahjong: Mahjong Battle Scramble - ...  0.859206
3    4293                       Raising Victor Vargas (2002)  0.827579
4    5887                     xXx: State of the Union (2005)  0.800258
5    5972           Pusher II: With Blood on My Hands (2004)  0.800258
6    6120                                  Revolution (1985)  0.800258
7    6678             World on a Wire (Welt am Draht) (1973)  0.800258
8   10435  Super Real Mahjong: Kasumi Miki Shouko no Haji...  0.800258
9    4631                            Italian Job, The (1969)  0.787476
10   6813                    Midnight Meat Train, The (2008)  0.739464
