# Anime Recommendation System using Cosine Similarity

## Data Loading and Exploration

In [None]:

import pandas as pd

# Load dataset
df = pd.read_csv('anime.csv')
df.head()


## Data Preprocessing

In [None]:

# Check missing values
df.isnull().sum()


In [None]:

# Fill missing values
df['genre'] = df['genre'].fillna('Unknown')
df['rating'] = df['rating'].fillna(df['rating'].mean())
df['members'] = df['members'].fillna(df['members'].mean())


## Feature Extraction

In [None]:

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from scipy.sparse import hstack

# TF-IDF for genres
tfidf = TfidfVectorizer(stop_words='english')
genre_matrix = tfidf.fit_transform(df['genre'])

# Normalize numerical features
scaler = MinMaxScaler()
num_features = scaler.fit_transform(df[['rating', 'members']])

# Combine features
feature_matrix = hstack([genre_matrix, num_features])


## Recommendation System

In [None]:

from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(feature_matrix)

def recommend_anime(title, top_n=5):
    if title not in df['name'].values:
        return "Anime not found"
    idx = df[df['name'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    anime_indices = [i[0] for i in sim_scores]
    return df[['name','genre','rating']].iloc[anime_indices]


## Example Recommendation

In [None]:

recommend_anime(df['name'].iloc[0])
