In [496]:
#! pip install scikit-learn

In [497]:
import requests
import json
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict
import gradio as gr
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [498]:
# API Credentials (Replace with your credentials)
CLIENT_ID = "8d3314772d90b39ef86aeed6ff9868aa"
API_URL = "https://api.myanimelist.net/v2/anime"
Fields = "id,title,alternative_titles,synopsis,genres,mean,rank,popularity,media_type,status"

In [499]:
def get_anime_data(anime_id):
    headers = {"X-MAL-Client-ID": CLIENT_ID}
    params = {"fields": Fields}
    response = requests.get(f"{API_URL}/{anime_id}", headers=headers, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

In [500]:
def fetch_anime_list(offset=0):
    headers = {"X-MAL-Client-ID": CLIENT_ID}
    params = {"ranking_type": "all", "limit": 100, "offset": offset, "fields": Fields}
    response = requests.get(f"{API_URL}/ranking", headers=headers, params=params)
    if response.status_code == 200:
        return response.json().get("data", [])
    else:
        print(f"Error {response.status_code}: {response.text}")
        return []

In [501]:
# Extract base title (removing "Season X", "Movie", "Special" identifiers)
def combine_copies(new_data):   
    def extract_base_title(title):
            title = title.split(': ')[0].split(' - ')[0].strip()
            base_title = re.sub(r'\s*(Season|Movie|Special|OVA|Part|Final|1st|2nd|3rd|4th|5th|6th|7th|8th|9th|10th)\s*\d*', '', title, flags=re.IGNORECASE).strip()
            return base_title

    new_data["base_title"] = new_data['title'].apply(extract_base_title)
    
    # Group by base title and aggregate related anime
    grouped_data = new_data.groupby("base_title").agg({
        "id": lambda x: list(x.reset_index(drop=True)),  # Collect IDs
        "title": lambda x: list(x.reset_index(drop=True)),  # Collect all versions of the title
        "synopsis": lambda x: " ".join(set(x.reset_index(drop=True))),  # Merge unique synopses
        "genres": lambda x: ", ".join(set(", ".join(x.reset_index(drop=True)).split(", "))),  # Unique genres
        "mean": "mean",  # Compute the average score
        "media_type": lambda x: list(set(x.reset_index(drop=True))),  # Store unique media types
        "status": lambda x: list(set(x.reset_index(drop=True)))  # Store unique statuses
    }).reset_index()

    grouped_data.drop(columns = 'title', inplace=True)

    # Rename base_title to title for final output
    grouped_data.rename(columns={"base_title": "title"}, inplace=True)
    
    return grouped_data

In [502]:
def preprocess_data(anime_list, existing_df):
    # Create a DataFrame from the MAL API response
    new_data = pd.DataFrame([
        {
            "id": anime["node"]["id"],  # Unique MAL ID
            "title": anime["node"]["title"],  # Anime title
            "alternative_titles": anime["node"]["title"],  # Anime title
            "synopsis": anime["node"].get("synopsis", ""),  # Synopsis (if available)
            "genres": ", ".join([g["name"] for g in anime["node"].get("genres", [])]),  # Genres as a comma-separated string
            "mean": anime["node"].get("mean", 0),  # Average score, defaulting to 0 if missing
            "media_type": anime["node"].get("media_type", "unknown"),  # Type (TV, movie, OVA, etc.)
            "status": anime["node"].get("status", "unknown")  # Status (e.g., finished_airing, airing)
        }
        for anime in anime_list
    ])
    # Merge with existing data if provided
    if existing_df is not None:
        new_data = pd.concat([existing_df, new_data], ignore_index=True)
    return(new_data)


    


In [503]:
def compute_similarity(df):
    vectorizer = TfidfVectorizer(stop_words="english")
    tfidf_matrix = vectorizer.fit_transform(df["synopsis"])
    similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)
    return similarity_matrix

In [504]:
def recommend_anime(user_favorites, df, similarity_matrix):
    recommended = set()
    for anime_title in user_favorites:
        if anime_title in df["title"].values:
            idx = df[df["title"].str.contains(anime_title)].index[0]
            similar_indices = similarity_matrix[idx].argsort()[-6:-1][::-1]
            recommended.update(df.iloc[similar_indices]["title"].values)
    return list(recommended)

In [505]:
#if __name__ == "__main__":
#    df = None 
    #for i in range(0, 21525, 100):
#    for i in range(0, 1000, 100):
#        anime_list = fetch_anime_list(i)
#        df = preprocess_data(anime_list,df)
#    df = combine_copies(df)
#    similarity_matrix = compute_similarity(df)
    
#    user_favorites = ['Overlord', 'Tensei shitara Slime Datta Ken']#'That Time I Got Reincarnated as a Slime']
    
#    recommendations = recommend_anime(user_favorites, df, similarity_matrix)
#    print("Recommended Anime:", recommendations)

In [506]:
# Assuming `df` is already loaded with anime data
def get_anime_titles():
    return df["title"].tolist()


In [507]:
def recommend(user_favorites):
    recommendations = recommend_anime(user_favorites, df, similarity_matrix)
    return "\n".join(recommendations)

In [508]:
def add_to_list(selected_anime, user_list):
    if selected_anime and selected_anime not in user_list:
        user_list.append(selected_anime)
    return user_list

In [509]:
def clear_list():
    return []

In [510]:
with gr.Blocks() as demo:
    gr.Markdown("# Anime Recommendation System")
    
    anime_dropdown = gr.Dropdown(get_anime_titles(), label="Select an anime to add to your list")
    user_list = gr.State([])  # Holds the list of selected anime
    anime_list_display = gr.Textbox(label="Your Anime List", interactive=False)
    
    add_button = gr.Button("Add to List")
    clear_button = gr.Button("Clear List")
    
    recommend_button = gr.Button("Get Recommendations")
    recommendations_output = gr.Textbox(label="Recommended Anime")
    
    add_button.click(add_to_list, inputs=[anime_dropdown, user_list], outputs=user_list)
    add_button.click(lambda x: "\n".join(x), inputs=user_list, outputs=anime_list_display)
    
    clear_button.click(clear_list, outputs=user_list)
    clear_button.click(lambda: "", outputs=anime_list_display)
    
    recommend_button.click(recommend, inputs=user_list, outputs=recommendations_output)

In [511]:
if __name__ == "__main__":
    demo.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


In [512]:
print(json.dumps(anime_list, indent=2))  # Check structure


[
  {
    "node": {
      "id": 7645,
      "title": "Heartcatch Precure!",
      "main_picture": {
        "medium": "https://cdn.myanimelist.net/images/anime/2/25915.jpg",
        "large": "https://cdn.myanimelist.net/images/anime/2/25915l.jpg"
      },
      "alternative_titles": {
        "synonyms": [
          "Heartcatch Pretty Cure!"
        ],
        "en": "",
        "ja": "\u30cf\u30fc\u30c8\u30ad\u30e3\u30c3\u30c1\u30d7\u30ea\u30ad\u30e5\u30a2!"
      },
      "synopsis": "Young flower enthusiast Tsubomi Hanasaki is often modest and quiet. But with her family moving to a new town, she aims to reinvent her image at her new school as someone more confident and outgoing. On moving day, she dreams of a mysterious tree in the sky guarded by a warrior named \"Cure Moonlight.\"\n\nTsubomi quickly learns that this was no ordinary dream when she encounters two mysterious fairies\u2014Chypre and Coffret\u2014who are being hunted down by a strange woman. When the woman summons a gian