In [23]:
# Import libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from jikanpy import Jikan

# Initialize Jikan client
jikan = Jikan()


In [24]:
# Load your CSV dataset
anime_df = pd.read_csv("anime.csv")

# Basic preprocessing
anime_df['genre'] = anime_df['genre'].fillna('')
anime_df['genre_clean'] = anime_df['genre'].str.replace(',', ' ').str.lower()

# (Optional) Clean up any HTML entities if needed
import html
anime_df['genre_clean'] = anime_df['genre_clean'].apply(html.unescape)

# Create a title to index lookup
title_to_index = pd.Series(anime_df.index, index=anime_df['name']).drop_duplicates()



In [25]:
# Vectorize the genres using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(anime_df['genre_clean'])

# Calculate cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [26]:
def recommend_local(anime_title, top_n=10):
    idx = title_to_index.get(anime_title)
    if idx is None:
        return None  # Anime not found locally
    
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    
    indices = [i[0] for i in sim_scores]
    similarities = [i[1] for i in sim_scores]
    
    recs = anime_df.iloc[indices][['name', 'genre', 'rating', 'members']].copy()
    recs['similarity'] = similarities
    
    return recs



In [27]:
def normalize_column(series):
    return (series - series.min()) / (series.max() - series.min())


In [28]:
from difflib import SequenceMatcher

def is_too_similar(title1, title2, threshold=0.8):
    title1_lower = title1.lower()
    title2_lower = title2.lower()
    
    if title1_lower in title2_lower or title2_lower in title1_lower:
        print(f"Substring match: '{title1}' vs '{title2}' → True")
        return True
    
    ratio = SequenceMatcher(None, title1_lower, title2_lower).ratio()
    print(f"Fuzzy match: '{title1}' vs '{title2}' → {ratio:.2f}")
    
    return ratio >= threshold



In [29]:
import requests

def search_anime_v4(title, limit=5):
    url = "https://api.jikan.moe/v4/anime"
    params = {
        "q": title,
        "limit": limit,
        "fields": "title,score,members,genres,synopsis"
    }
    response = requests.get(url, params=params)
    
    if response.status_code != 200:
        print(f"API error: {response.status_code} - {response.text}")
        return None
    
    data = response.json()
    return data.get('data', [])

results = search_anime_v4("Fullmetal Alchemist")
for anime in results:
    title = anime.get('title')
    score = anime.get('score')
    members = anime.get('members')
    genres = [g['name'] for g in anime.get('genres', [])]
    synopsis = anime.get('synopsis')
    print(f"{title} - Score: {score}, Members: {members}, Genres: {genres}")


Fullmetal Alchemist - Score: 8.11, Members: 1523824, Genres: ['Action', 'Adventure', 'Award Winning', 'Drama', 'Fantasy']
Fullmetal Alchemist: Brotherhood - Score: 9.1, Members: 3542513, Genres: ['Action', 'Adventure', 'Drama', 'Fantasy']
Fullmetal Alchemist: The Conqueror of Shamballa - Score: 7.5, Members: 340804, Genres: ['Action', 'Adventure', 'Award Winning', 'Drama', 'Fantasy']
Fullmetal Alchemist: The Sacred Star of Milos - Score: 7.26, Members: 193498, Genres: ['Action', 'Adventure', 'Drama', 'Fantasy']
Fullmetal Alchemist: Brotherhood Specials - Score: 8, Members: 165461, Genres: ['Action', 'Adventure', 'Drama', 'Fantasy']


In [30]:
import time
def enrich_recommendations(local_recs_df):
    enriched = []
    for title in local_recs_df['name']:
        results = search_anime_v4(title)
        if results:
            anime = results[0]
            enriched.append({
                'title': anime.get('title'),
                'score': anime.get('score'),
                'members': anime.get('members'),
                'genres': [g['name'] for g in anime.get('genres', [])],
                'synopsis': anime.get('synopsis')
            })
        else:
            enriched.append({'title': title, 'score': None, 'members': None, 'genres': None, 'synopsis': None})
    enriched_df = pd.DataFrame(enriched)
    time.sleep(3.0)  # <--- pause here after each API call

    # Merge similarity from local recs
    enriched_df = enriched_df.merge(local_recs_df[['name', 'similarity']], left_on='title', right_on='name', how='left')

    # Normalize columns
    enriched_df['members_norm'] = normalize_column(enriched_df['members'].fillna(0))
    enriched_df['score_norm'] = normalize_column(enriched_df['score'].fillna(0))
    enriched_df['similarity_norm'] = normalize_column(enriched_df['similarity'].fillna(0))
    
    return enriched_df



In [None]:
def weighted_recommendations(enriched_df, original_title, weight_similarity=0.4, weight_members=0.3, weight_rating=0.3, top_n=10):
    # Normalize columns to 0-1 scale (fill NaNs with 0)
    enriched_df['similarity_norm'] = normalize_column(enriched_df['similarity'].fillna(0))
    enriched_df['members_norm'] = normalize_column(enriched_df['members'].fillna(0))
    enriched_df['score_norm'] = normalize_column(enriched_df['score'].fillna(0))

    # Calculate initial weighted combined score
    enriched_df['combined_score'] = (
        enriched_df['similarity_norm'] * weight_similarity +
        enriched_df['members_norm'] * weight_members +
        enriched_df['score_norm'] * weight_rating
    )

    # Apply penalty for titles too similar to original_title
    enriched_df['similar_title_penalty'] = enriched_df['title'].apply(
        lambda t: 0.2 if is_too_similar(t, original_title) else 1.0
    )
    
    # Multiply combined score by penalty
    enriched_df['combined_score'] *= enriched_df['similar_title_penalty']

    # Sort by combined score descending, pick top N
    final_recs = enriched_df.sort_values('combined_score', ascending=False).head(top_n)
    return final_recs[['title', 'genres', 'score', 'members', 'combined_score']]


In [32]:
def hybrid_recommend(anime_title, top_n=10, weight_similarity=0.4, weight_members=0.3, weight_rating=0.3):
    local_recs = recommend_local(anime_title, top_n)
    if local_recs is None:
        return f"'{anime_title}' not found in local dataset."

    enriched_df = enrich_recommendations(local_recs)
    final_recs = weighted_recommendations(
        enriched_df,
        original_title=anime_title,
        weight_similarity=weight_similarity,
        weight_members=weight_members,
        weight_rating=weight_rating,
        top_n=top_n
    )
    return final_recs


In [33]:
result = hybrid_recommend("One Piece", top_n=3)
print(result)

Substring match: 'One Piece: Episode of Merry - Mou Hitori no Nakama no Monogatari' vs 'One Piece' → True
Substring match: 'One Piece: Episode of Nami - Koukaishi no Namida to Nakama no Kizuna' vs 'One Piece' → True
Substring match: 'One Piece: Episode of Sabo - 3 Kyoudai no Kizuna Kiseki no Saikai to Uketsugareru Ishi' vs 'One Piece' → True
                                               title  \
0  One Piece: Episode of Merry - Mou Hitori no Na...   
1  One Piece: Episode of Nami - Koukaishi no Nami...   
2  One Piece: Episode of Sabo - 3 Kyoudai no Kizu...   

                         genres  score  members  combined_score  
0  [Action, Adventure, Fantasy]   8.19    79039             NaN  
1  [Action, Adventure, Fantasy]   8.11    74900             NaN  
2  [Action, Adventure, Fantasy]   7.70    43536             NaN  
