In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def load_data(csv_file_path):
  # load dataset into a pandas DataFrame
  df = pd.read_csv(csv_file_path)
  
  # making sure dataset is filled
  df['Synopsis'] = df['Synopsis'].fillna('')
  df['Genres'] = df['Genres'].fillna('')

  # put Synopsis and Genre together into one text field
  df['combined_features'] = df['Synopsis'] + ' ' + df['Genres']
  return df

def build_similarity_matrix(df):
  # create TF-IDF Vectorizer using default english
  tfidf = TfidfVectorizer(stop_words='english')

  # create the TF-IDF matrix based on combined features from earlier
  tfidf_matrix = tfidf.fit_transform(df['combined_features'])

  # cosine similarity between all anime in list
  cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
  return cosine_sim

def recommend_anime(anime_name, df, cosine_sim, n):
  # mapping anime titles to dataFrame indices
  indices = pd.Series(df.index, index=df['Name']).drop_duplicates()
  if anime_name not in indices:
    return f"'{anime_name}' not found"
  
  # get the index corresponding to the anime
  index = indices[anime_name]
  
  # compute similarity scores for this anime with all others
  sim_scores = list(enumerate(cosine_sim[index]))

  # sort the anime based on similarity scores (highest first)
  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
  
  # only take requested number of entries
  sim_scores = sim_scores[1:n+1]
  anime_indices = [i[0] for i in sim_scores]
  
  # return selected columns for the recommended anime
  return df.iloc[anime_indices][['anime_id', 'Name', 'Score', 'Genres']]

if __name__ == "__main__":
  csv_file_path = './dataset/anime-dataset-2023.csv'
  df = load_data(csv_file_path)
  cosine_sim = build_similarity_matrix(df)
  
  anime_to_recommend = "Lycoris Recoil"
  recommendations = recommend_anime(anime_to_recommend, df, cosine_sim, n=5)
  print("Recommendations for", anime_to_recommend)
  print(recommendations)


Recommendations for Lycoris Recoil
       anime_id                                              Name    Score  \
24036     54440               Lycoris Recoil (Shinsaku Animation)  UNKNOWN   
3787       4729                                    Natural 2: Duo     5.91   
7930      18747                                   Hotaru Kagayaku     5.47   
2980       3375                                         Kirepapa.      6.6   
4981       7415  Mukougaoka Chisato wa Tada Mitsumeteita no Datta     6.01   

                                           Genres  
24036                                      Action  
3787                                       Hentai  
7930                                        Drama  
2980   Boys Love, Comedy, Drama, Romance, Erotica  
4981                                      UNKNOWN  
