In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load data
df = pd.read_csv("anime.csv")

# Step 2: Fill missing values
for col in ['genre', 'studio', 'synopsis', 'source']:
    df[col] = df[col].fillna('')

# Step 3: Create a "content" column combining useful metadata
df['content'] = df['genre'] + ' ' + df['studio'] + ' ' + df['source'] + ' ' + df['synopsis']

# Step 4: TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['content'])

# Step 5: Cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 6: Build a reverse mapping of anime_id to index
anime_id_to_index = pd.Series(df.index, index=df['anime_id'])

# Step 7: Recommendation function
def recommend(anime_id, top_n=5):
    if anime_id not in anime_id_to_index:
        return f"Anime ID {anime_id} not found in the dataset."

    idx = anime_id_to_index[anime_id]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]  # skip itself

    anime_indices = [i[0] for i in sim_scores]
    return df[['anime_id', 'name']].iloc[anime_indices]

# Example usage:
print(recommend(1))  # Replace with any anime_id from your CSV
