In [1]:
import requests
import pandas as pd
import time

# Input and output files
input_file = "just_titles.csv"
output_file = "anime_data.csv"

# Load and clean title list
titles_df = pd.read_csv(input_file)
titles_list = titles_df.iloc[:, 0].dropna().tolist()  # Extract titles as a clean list

# AniList API endpoint
API_URL = "https://graphql.anilist.co"

# Updated GraphQL Query (Includes Related Anime)
QUERY = """
query ($search: String) {
    Anime: Media(search: $search, type: ANIME) {
        id
        idMal
        title {
            romaji
        }
        format
        status
        episodes
        duration
        source
        countryOfOrigin
        meanScore
        popularity
        trending
        favourites
        genres
        startDate {
            year
            month
            day
        }
        endDate {
            year
            month
            day
        }
        tags {
            name
        }
        isAdult
        relations {
            nodes {
                id
                idMal
                title {
                    romaji
                }
                format
                status
                episodes
                duration
                source
                countryOfOrigin
                meanScore
                popularity
                trending
                favourites
                genres
                startDate {
                    year
                    month
                    day
                }
                endDate {
                    year
                    month
                    day
                }
                tags {
                    name
                }
                isAdult
            }
        }
    }
}
"""

# Store results
anime_data_list = []

# Process each title
for idx, title in enumerate(titles_list, 1):
    title = str(title).strip()  # Ensure it's a clean string
    print(f"🔍 Processing {idx}/{len(titles_list)}: {title}")

    variables = {"search": title}
    response = requests.post(API_URL, json={"query": QUERY, "variables": variables})

    if response.status_code == 200:
        anime = response.json().get("data", {}).get("Anime", {})

        if anime:
            # Function to extract anime data
            def extract_anime_data(data, search_title):
                return {
                    "Search_Title": search_title,
                    "Anime_Title_Romaji": data["title"].get("romaji", "N/A"),
                    "Anime_ID": data.get("id", "N/A"),
                    "MAL_ID": data.get("idMal", "N/A"),
                    "Anime_Format": data.get("format", "N/A"),
                    "Anime_Status": data.get("status", "N/A"),
                    "Anime_Episodes": data.get("episodes", "N/A"),
                    "Anime_Duration (min)": data.get("duration", "N/A"),
                    "Anime_Source": data.get("source", "N/A"),
                    "Anime_Country": data.get("countryOfOrigin", "N/A"),
                    "Anime_MeanScore": data.get("meanScore", "N/A"),
                    "Anime_Popularity": data.get("popularity", "N/A"),
                    "Anime_Trending": data.get("trending", "N/A"),
                    "Anime_Favourites": data.get("favourites", "N/A"),
                    "Anime_Genres": ", ".join(data.get("genres", [])) if data.get("genres") else "N/A",
                    "Anime_StartDate": f"{data.get('startDate', {}).get('year', 'N/A')}-"
                                      f"{data.get('startDate', {}).get('month', 'N/A')}-"
                                      f"{data.get('startDate', {}).get('day', 'N/A')}",
                    "Anime_EndDate": f"{data.get('endDate', {}).get('year', 'N/A')}-"
                                    f"{data.get('endDate', {}).get('month', 'N/A')}-"
                                    f"{data.get('endDate', {}).get('day', 'N/A')}",
                    "Anime_Tags": ", ".join(tag["name"] for tag in data.get("tags", [])) if data.get("tags") else "N/A",
                    "Anime_IsAdult": data.get("isAdult", "N/A"),
                    "Related_To": "Main Entry"
                }

            # Save main entry
            anime_data_list.append(extract_anime_data(anime, title))

            # Save related anime (sequels, prequels, OVAs, etc.)
            for related in anime.get("relations", {}).get("nodes", []):
                anime_data_list.append(extract_anime_data(related, title) | {"Related_To": anime["title"]["romaji"]})

        else:
            print(f"⚠️ No data found for '{title}', adding as a placeholder.")
            anime_data_list.append({
                "Search_Title": title,
                "Anime_Title_Romaji": title,
                "Anime_ID": "N/A",
                "MAL_ID": "N/A",
                "Anime_Format": "N/A",
                "Anime_Status": "N/A",
                "Anime_Episodes": "N/A",
                "Anime_Duration (min)": "N/A",
                "Anime_Source": "N/A",
                "Anime_Country": "N/A",
                "Anime_MeanScore": "N/A",
                "Anime_Popularity": "N/A",
                "Anime_Trending": "N/A",
                "Anime_Favourites": "N/A",
                "Anime_Genres": "N/A",
                "Anime_StartDate": "N/A",
                "Anime_EndDate": "N/A",
                "Anime_Tags": "N/A",
                "Anime_IsAdult": "N/A",
                "Related_To": "N/A"
            })
    else:
        print(f"❌ Error fetching '{title}': {response.json()}, adding as a placeholder.")
        anime_data_list.append({
            "Search_Title": title,
            "Anime_Title_Romaji": title,
            "Anime_ID": "N/A",
            "MAL_ID": "N/A",
            "Anime_Format": "N/A",
            "Anime_Status": "N/A",
            "Anime_Episodes": "N/A",
            "Anime_Duration (min)": "N/A",
            "Anime_Source": "N/A",
            "Anime_Country": "N/A",
            "Anime_MeanScore": "N/A",
            "Anime_Popularity": "N/A",
            "Anime_Trending": "N/A",
            "Anime_Favourites": "N/A",
            "Anime_Genres": "N/A",
            "Anime_StartDate": "N/A",
            "Anime_EndDate": "N/A",
            "Anime_Tags": "N/A",
            "Anime_IsAdult": "N/A",
            "Related_To": "N/A"
        })

    # Rate limiting (2 requests per second)
    time.sleep(2)

# Convert to DataFrame
anime_df = pd.DataFrame(anime_data_list)

# Save to CSV
anime_df.to_csv(output_file, index=False)

print(f"\n✅ Data saved to {output_file} with {len(anime_df)} records.")


🔍 Processing 1/5262: Kimi ni Todoke
🔍 Processing 2/5262: Fullmetal Alchemist
🔍 Processing 3/5262: Nodame Cantabile
🔍 Processing 4/5262: Bamboo Blade
🔍 Processing 5/5262: Tsukihime
🔍 Processing 6/5262: Detroit Metal City
🔍 Processing 7/5262: Kyou no Nekomura san
❌ Error fetching 'Kyou no Nekomura san': {'errors': [{'message': 'Not Found.', 'status': 404, 'locations': [{'line': 3, 'column': 5}]}], 'data': {'Anime': None}}, adding as a placeholder.
🔍 Processing 8/5262: Fushigi Yuugi Genbu Kaiden
❌ Error fetching 'Fushigi Yuugi Genbu Kaiden': {'errors': [{'message': 'Not Found.', 'status': 404, 'locations': [{'line': 3, 'column': 5}]}], 'data': {'Anime': None}}, adding as a placeholder.
🔍 Processing 9/5262: Koizora Setsunai Koimonogatari
❌ Error fetching 'Koizora Setsunai Koimonogatari': {'errors': [{'message': 'Not Found.', 'status': 404, 'locations': [{'line': 3, 'column': 5}]}], 'data': {'Anime': None}}, adding as a placeholder.
🔍 Processing 10/5262: Shinseiki Evangelion Ikari Shinji Ik