In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Load the dataset
anime_df = pd.read_csv('anime.csv')

In [3]:
# Drop rows with missing titles or genres
anime_df = anime_df.dropna(subset=['name', 'genre'])

In [5]:
# Fill missing ratings and episodes with 0
anime_df['rating'] = anime_df['rating'].fillna(0)
anime_df['episodes'] = anime_df['episodes'].replace('Unknown', 0).fillna(0).astype(float)

In [6]:
# TF-IDF Vectorizer for genres
tfidf = TfidfVectorizer(token_pattern=r'[^, ]+')
genre_matrix = tfidf.fit_transform(anime_df['genre'])

In [7]:
# Normalize rating and episodes
scaler = MinMaxScaler()
num_features = scaler.fit_transform(anime_df[['rating', 'episodes']])

In [8]:
# Combine all features
import scipy.sparse
combined_features = scipy.sparse.hstack([genre_matrix, num_features])

In [9]:
# Cosine similarity
cosine_sim = cosine_similarity(combined_features, combined_features)

In [10]:
# Anime title to index mapping
anime_index = pd.Series(anime_df.index, index=anime_df['name']).drop_duplicates()

# Recommendation function
def recommend_anime(title, top_n=10):
    idx = anime_index.get(title)
    if idx is None:
        return f"'{title}' not found in dataset."

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    anime_indices = [i[0] for i in sim_scores]

    return anime_df[['name', 'genre', 'rating']].iloc[anime_indices]

# Example usage
recommendations = recommend_anime('Naruto', top_n=5)
print(recommendations)

                                                   name  \
1103  Boruto: Naruto the Movie - Naruto ga Hokage ni...   
615                                  Naruto: Shippuuden   
486                            Boruto: Naruto the Movie   
1343                                        Naruto x UT   
1472        Naruto: Shippuuden Movie 4 - The Lost Tower   

                                                  genre  rating  
1103  Action, Comedy, Martial Arts, Shounen, Super P...    7.68  
615   Action, Comedy, Martial Arts, Shounen, Super P...    7.94  
486   Action, Comedy, Martial Arts, Shounen, Super P...    8.03  
1343  Action, Comedy, Martial Arts, Shounen, Super P...    7.58  
1472  Action, Comedy, Martial Arts, Shounen, Super P...    7.53  
