In [2]:
#data preprocessing
import pandas as pd

# Loading the dataset
df = pd.read_csv("anime.csv")

# basic information
print(df.info())
print(df.head())

# Handling missing values
df['genre'] = df['genre'].fillna('Unknown')
df['rating'] = df['rating'].fillna(df['rating'].mean())
df.dropna(inplace=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB
None
   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Advent

In [3]:
#I used genre and rating columns to compute cosine similarit
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MinMaxScaler
import scipy.sparse as sp

# Genre vectorization
vectorizer = CountVectorizer(tokenizer=lambda x: x.split(', '))
genre_matrix = vectorizer.fit_transform(df['genre'])

# Normalize ratings
scaler = MinMaxScaler()
rating_scaled = scaler.fit_transform(df[['rating']])

# Combining genre and rating into one matrix
features = sp.hstack([genre_matrix, rating_scaled])



In [4]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Computing cosine similarity
cos_sim = cosine_similarity(features, features)

# Recommendation function
def recommend_anime(title, top_n=5):
    if title not in df['name'].values:
        return f"'{title}' not found in the dataset."
    
    idx = df[df['name'] == title].index[0]
    sim_scores = list(enumerate(cos_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    indices = [i[0] for i in sim_scores]
    return df[['name', 'genre', 'rating']].iloc[indices]

# Example
print(recommend_anime("Naruto", top_n=5))

                                                   name  \
615                                  Naruto: Shippuuden   
1103  Boruto: Naruto the Movie - Naruto ga Hokage ni...   
486                            Boruto: Naruto the Movie   
1343                                        Naruto x UT   
1472        Naruto: Shippuuden Movie 4 - The Lost Tower   

                                                  genre  rating  
615   Action, Comedy, Martial Arts, Shounen, Super P...    7.94  
1103  Action, Comedy, Martial Arts, Shounen, Super P...    7.68  
486   Action, Comedy, Martial Arts, Shounen, Super P...    8.03  
1343  Action, Comedy, Martial Arts, Shounen, Super P...    7.58  
1472  Action, Comedy, Martial Arts, Shounen, Super P...    7.53  


In [None]:
#Difference between User-Based and Item-Based Collaborative Filtering?
User-Based CF: Recommends items by finding similar users. 
    If User A likes items X and Y, and User B is similar to A, B is likely to like X and Y.

Item-Based CF: Recommends items based on similarity between items. 
    If X and Y are often liked together, then users who like X will be recommended Y.

In [None]:
#What is Collaborative Filtering and How Does It Work?
Collaborative Filtering (CF) is a recommendation technique that suggests items by identifying user or item similarities based on interactions