In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.preprocessing import MaxAbsScaler
from sklearn.neighbors import NearestNeighbors

In [14]:
#laod data
df = pd.read_csv("anime.csv")

#First Few rows
df.head(10)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
5,32935,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Ga...,"Comedy, Drama, School, Shounen, Sports",TV,10,9.15,93351
6,11061,Hunter x Hunter (2011),"Action, Adventure, Shounen, Super Power",TV,148,9.13,425855
7,820,Ginga Eiyuu Densetsu,"Drama, Military, Sci-Fi, Space",OVA,110,9.11,80679
8,15335,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,"Action, Comedy, Historical, Parody, Samurai, S...",Movie,1,9.1,72534
9,15417,Gintama&#039;: Enchousen,"Action, Comedy, Historical, Parody, Samurai, S...",TV,13,9.11,81109


In [15]:
#Check null values
df.isnull().sum()

df['genre'] = df['genre'].fillna('')
df['type'] = df['type'].fillna('')
df['rating'] = df['rating'].fillna(0)
df['episodes'] = pd.to_numeric(df['episodes'],errors='coerce')
df['episodes'].fillna(df['episodes'].median(),inplace=True)
episode_ = df['episodes']
df.isnull().sum()

df.isnull().sum()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['episodes'].fillna(df['episodes'].median(),inplace=True)


anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [16]:
print(df.shape)
print(df.dtypes)

(12294, 7)
anime_id      int64
name         object
genre        object
type         object
episodes    float64
rating      float64
members       int64
dtype: object


In [17]:
genre_list = df['genre'].to_string()
type_ = pd.get_dummies(df['type'])
genre_ = df['genre'].str.get_dummies(sep=",")
genre_.head()

X = pd.concat([genre_,type_,episode_,df['rating'],df['members']],axis=1)
X.head()

Unnamed: 0,Adventure,Cars,Comedy,Dementia,Demons,Drama,Ecchi,Fantasy,Game,Harem,...,Unnamed: 12,Movie,Music,ONA,OVA,Special,TV,episodes,rating,members
0,0,0,0,0,0,0,0,0,0,0,...,False,True,False,False,False,False,False,1.0,9.37,200630
1,1,0,0,0,0,1,0,1,0,0,...,False,False,False,False,False,False,True,64.0,9.26,793665
2,0,0,1,0,0,0,0,0,0,0,...,False,False,False,False,False,False,True,51.0,9.25,114262
3,0,0,0,0,0,0,0,0,0,0,...,False,False,False,False,False,False,True,24.0,9.17,673572
4,0,0,1,0,0,0,0,0,0,0,...,False,False,False,False,False,False,True,51.0,9.16,151266


In [18]:
scaled = MaxAbsScaler()
X = scaled.fit_transform(X)

#KNN
recommendations = NearestNeighbors(n_neighbors=11, algorithm='ball_tree').fit(X)
recommendations.kneighbors(X)
anime_indices = recommendations.kneighbors(X)[1] # picks off the array for anime indices


In [19]:

#Use MultiLabelBinarizer for one hot encoding genres
mlb = MultiLabelBinarizer()
genre_encoded = pd.DataFrame(mlb.fit_transform(df['genre']),columns=mlb.classes_,index=df.index)

#Normalize numerical featues(eg. rating)
scaler = MinMaxScaler()
df[['rating']]=scaler.fit_transform(df[['rating']])

#Merge genre encoding and rating
feature_matrix = pd.concat([genre_encoded,df[['rating']]],axis=1)


In [20]:
def get_index(x):
    #give index for the anime
    return df[df['name']==x].index.tolist()[0]

#Compute the consine similarity matrix
cosine_sim = cosine_similarity(feature_matrix)


In [21]:
#Function to get recommendations
def recommendation_anime(title,df,cosine_sim,top_n=5):
    if title not in df['name'].values:
     return "Anime not found in dataset"
    
    #Get the index of the anime
    #idx = get_index(title)
    idx = df[df['name'] == title].index[0]

    #Get similarity scored for all anime
    sim_scores = list(enumerate(cosine_sim[idx]))

    #Sort based on similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    #Get top N recommendations (excluding the anime itself)
    top_anime = [df['name'].iloc[i[0]] for i in sim_scores[1:top_n+1]]

    return top_anime

In [22]:
#Example usage
print("\n",recommendation_anime("Naruto",df,cosine_sim, top_n=5))

#Evaluation
#Splitting data into train and test sets
train,test = train_test_split(df,test_size=0.2,random_state=42)

#Generate recommendation for test set
y_true = test['name'].tolist()
y_pred = [recommendation_anime(title,df,cosine_sim) for title in test['name']]

#Convert to binary relevance (1 if match, 0 otherwise)
y_true_binary =[1 if title in pred else 0 for title,pred in zip(y_true,y_pred)]

#Genrate predicted binary labels
y_pred_binary = [1] * len(y_pred ) #All recommendations are considered positive samples



 ['Naruto: Shippuuden', 'Boruto: Naruto the Movie - Naruto ga Hokage ni Natta Hi', 'Boruto: Naruto the Movie', 'Naruto x UT', 'Naruto: Shippuuden Movie 4 - The Lost Tower']


In [23]:
#Compute Evalteion Metrics
precision = precision_score(y_true_binary,y_pred_binary,zero_division=1)
recall = recall_score(y_true_binary,y_pred_binary,zero_division=1)
f1 = f1_score(y_true_binary,y_pred_binary,zero_division=1)

print(f"\nPrecision: {precision:.2f}")
print(f"\nRecall: {recall:.2f}")
print(f"\nF1-Score: {f1:.2f}")



Precision: 0.13

Recall: 1.00

F1-Score: 0.22


Conclusion
Having seen most of these anime, I can attest to the fact that they are similar to Naruto in at least tone/mood.

Precision (0.13), Recall (1.00), and F1-Score (0.22) indicate that your recommendation system retrieves all relevant items but also few returns irrelevant ones.
