In [3]:
import requests
import pandas as pd
import time

# Input and output files
input_file = "just_titles.csv"
output_file = "manga_data.csv"

# Load and clean title list
titles_df = pd.read_csv(input_file)
titles_list = titles_df.iloc[:, 0].dropna().tolist()  # Extract titles as a clean list

# AniList API endpoint
API_URL = "https://graphql.anilist.co"

# GraphQL Query
QUERY = """
query ($search: String) {
    Manga: Media(search: $search, type: MANGA, format: MANGA) {
        id
        idMal
        title {
            romaji
        }
        format
        status
        chapters
        volumes
        source
        countryOfOrigin
        meanScore
        popularity
        trending
        favourites
        genres
        startDate {
            year
            month
            day
        }
        endDate {
            year
            month
            day
        }
        tags {
            name
        }
        isAdult
    }
}
"""

# Store results
manga_data_list = []

# Process each title
for idx, title in enumerate(titles_list, 1):
    title = str(title).strip()  # Ensure it's a clean string
    print(f"🔍 Processing {idx}/{len(titles_list)}: {title}")

    variables = {"search": title}
    response = requests.post(API_URL, json={"query": QUERY, "variables": variables})

    if response.status_code == 200:
        data = response.json().get("data", {}).get("Manga", {})

        if data:
            manga_entry = {
                "Manga_Title_Romaji": data["title"].get("romaji", title),  # Fallback to original title
                "Manga_ID": data.get("id", "N/A"),
                "MAL_ID": data.get("idMal", "N/A"),
                "Manga_Format": data.get("format", "N/A"),
                "Manga_Status": data.get("status", "N/A"),
                "Manga_Chapters": data.get("chapters", "N/A"),
                "Manga_Volumes": data.get("volumes", "N/A"),
                "Manga_Source": data.get("source", "N/A"),
                "Manga_Country": data.get("countryOfOrigin", "N/A"),
                "Manga_MeanScore": data.get("meanScore", "N/A"),
                "Manga_Popularity": data.get("popularity", "N/A"),
                "Manga_Trending": data.get("trending", "N/A"),
                "Manga_Favourites": data.get("favourites", "N/A"),
                "Manga_Genres": ", ".join(data.get("genres", [])) if data.get("genres") else "N/A",
                "Manga_StartDate": f"{data.get('startDate', {}).get('year', 'N/A')}-"
                                  f"{data.get('startDate', {}).get('month', 'N/A')}-"
                                  f"{data.get('startDate', {}).get('day', 'N/A')}",
                "Manga_EndDate": f"{data.get('endDate', {}).get('year', 'N/A')}-"
                                f"{data.get('endDate', {}).get('month', 'N/A')}-"
                                f"{data.get('endDate', {}).get('day', 'N/A')}",
                "Manga_Tags": ", ".join(tag["name"] for tag in data.get("tags", [])) if data.get("tags") else "N/A",
                "Manga_IsAdult": data.get("isAdult", "N/A")
            }
        else:
            print(f"⚠️ No data found for '{title}', adding as a placeholder.")
            manga_entry = {
                "Manga_Title_Romaji": title,
                "Manga_ID": "N/A",
                "MAL_ID": "N/A",
                "Manga_Format": "N/A",
                "Manga_Status": "N/A",
                "Manga_Chapters": "N/A",
                "Manga_Volumes": "N/A",
                "Manga_Source": "N/A",
                "Manga_Country": "N/A",
                "Manga_MeanScore": "N/A",
                "Manga_Popularity": "N/A",
                "Manga_Trending": "N/A",
                "Manga_Favourites": "N/A",
                "Manga_Genres": "N/A",
                "Manga_StartDate": "N/A",
                "Manga_EndDate": "N/A",
                "Manga_Tags": "N/A",
                "Manga_IsAdult": "N/A"
            }
    else:
        print(f"❌ Error fetching '{title}': {response.json()}, adding as a placeholder.")
        manga_entry = {
            "Manga_Title_Romaji": title,
            "Manga_ID": "N/A",
            "MAL_ID": "N/A",
            "Manga_Format": "N/A",
            "Manga_Status": "N/A",
            "Manga_Chapters": "N/A",
            "Manga_Volumes": "N/A",
            "Manga_Source": "N/A",
            "Manga_Country": "N/A",
            "Manga_MeanScore": "N/A",
            "Manga_Popularity": "N/A",
            "Manga_Trending": "N/A",
            "Manga_Favourites": "N/A",
            "Manga_Genres": "N/A",
            "Manga_StartDate": "N/A",
            "Manga_EndDate": "N/A",
            "Manga_Tags": "N/A",
            "Manga_IsAdult": "N/A"
        }

    # Append manga entry to the list
    manga_data_list.append(manga_entry)

    # Rate limiting (1.5 requests per second)
    time.sleep(1.5)

# Convert to DataFrame
manga_df = pd.DataFrame(manga_data_list)

# Save to CSV
manga_df.to_csv(output_file, index=False)

print(f"\n✅ Data saved to {output_file} with {len(manga_df)} records.")

🔍 Processing 1/5262: Kimi ni Todoke
🔍 Processing 2/5262: Fullmetal Alchemist
🔍 Processing 3/5262: Nodame Cantabile
🔍 Processing 4/5262: Bamboo Blade
🔍 Processing 5/5262: Tsukihime
🔍 Processing 6/5262: Detroit Metal City
🔍 Processing 7/5262: Kyou no Nekomura san
🔍 Processing 8/5262: Fushigi Yuugi Genbu Kaiden
🔍 Processing 9/5262: Koizora Setsunai Koimonogatari
🔍 Processing 10/5262: Shinseiki Evangelion Ikari Shinji Ikusei Keikaku
🔍 Processing 11/5262: Uwasa no Midori kun
🔍 Processing 12/5262: Soul Eater
🔍 Processing 13/5262: Kiyoku Yawaku
🔍 Processing 14/5262: Bokutachi wa Shitte Shimatta
🔍 Processing 15/5262: Amatsuki
🔍 Processing 16/5262: Konjiki no Gash
🔍 Processing 17/5262: Tsubasa Reservoir Chronicle
🔍 Processing 18/5262: Ahiru no Sora
🔍 Processing 19/5262: One Piece
🔍 Processing 20/5262: Fairy Tail


KeyboardInterrupt: 