In [6]:
import requests
import pandas as pd
import time
import os
input_file = "just_titles.csv"
output_folder = "manga_data"  # Folder for batch files
output_file_template = os.path.join(output_folder, "manga_data_batch_{}.csv")

# Ensure output folder exists
os.makedirs(output_folder, exist_ok=True)

# Load and clean title list
titles_df = pd.read_csv(input_file)
titles_list = titles_df.iloc[:, 0].dropna().tolist()  # Extract titles as a clean list

# AniList API endpoint
API_URL = "https://graphql.anilist.co"

# GraphQL Query
QUERY = """
query ($search: String) {
    Manga: Media(search: $search, type: MANGA, format: MANGA) {
        id
        idMal
        title {
            romaji
        }
        format
        status
        chapters
        volumes
        source
        countryOfOrigin
        meanScore
        popularity
        trending
        favourites
        genres
        startDate {
            year
            month
            day
        }
        endDate {
            year
            month
            day
        }
        tags {
            name
        }
        isAdult
    }
}
"""

# Store results
batch_size = 10  # Number of titles per batch
manga_data_list = []
rerun_batches = set(range(370, 381))  # Inclusive: 370 to 380

# Process each title
for idx, title in enumerate(titles_list, 1):
    current_batch = (idx - 1) // batch_size + 1
    if current_batch not in rerun_batches:
        continue  # Skip titles not in rerun batches
    title = str(title).strip()  # Ensure it's a clean string
    print(f"🔍 Processing {idx}/{len(titles_list)}: {title}")

    variables = {"search": title}
    response = requests.post(API_URL, json={"query": QUERY, "variables": variables})

    if response.status_code == 200:
        data = response.json().get("data", {}).get("Manga", {})

        if data:
            romaji_title = data["title"].get("romaji", title)
            manga_entry = {
                "search_title": title,
                "series": romaji_title,
                "Manga_Title_Romaji": romaji_title,
                "Manga_ID": data.get("id", "N/A"),
                "MAL_ID": data.get("idMal", "N/A"),
                "Manga_Format": data.get("format", "N/A"),
                "Manga_Status": data.get("status", "N/A"),
                "Manga_Chapters": data.get("chapters", "N/A"),
                "Manga_Volumes": data.get("volumes", "N/A"),
                "Manga_Source": data.get("source", "N/A"),
                "Manga_Country": data.get("countryOfOrigin", "N/A"),
                "Manga_MeanScore": data.get("meanScore", "N/A"),
                "Manga_Popularity": data.get("popularity", "N/A"),
                "Manga_Trending": data.get("trending", "N/A"),
                "Manga_Favourites": data.get("favourites", "N/A"),
                "Manga_Genres": ", ".join(data.get("genres", [])) if data.get("genres") else "N/A",
                "Manga_StartDate": f"{data.get('startDate', {}).get('year', 'N/A')}-"
                                  f"{data.get('startDate', {}).get('month', 'N/A')}-"
                                  f"{data.get('startDate', {}).get('day', 'N/A')}",
                "Manga_EndDate": f"{data.get('endDate', {}).get('year', 'N/A')}-"
                                f"{data.get('endDate', {}).get('month', 'N/A')}-"
                                f"{data.get('endDate', {}).get('day', 'N/A')}",
                "Manga_Tags": ", ".join(tag["name"] for tag in data.get("tags", [])) if data.get("tags") else "N/A",
                "Manga_IsAdult": data.get("isAdult", "N/A")
            }
        else:
            print(f"⚠️ No data found for '{title}', adding as a placeholder.")
            manga_entry = {
                "search_title": title,
                "series": "N/A",
                "Manga_Title_Romaji": title,
                "Manga_ID": "N/A",
                "MAL_ID": "N/A",
                "Manga_Format": "N/A",
                "Manga_Status": "N/A",
                "Manga_Chapters": "N/A",
                "Manga_Volumes": "N/A",
                "Manga_Source": "N/A",
                "Manga_Country": "N/A",
                "Manga_MeanScore": "N/A",
                "Manga_Popularity": "N/A",
                "Manga_Trending": "N/A",
                "Manga_Favourites": "N/A",
                "Manga_Genres": "N/A",
                "Manga_StartDate": "N/A",
                "Manga_EndDate": "N/A",
                "Manga_Tags": "N/A",
                "Manga_IsAdult": "N/A"
            }

    else:
        print(f"❌ Error fetching '{title}': {response.json()}, adding as a placeholder.")
        manga_entry = {
            "Manga_Title_Romaji": title,
            "Manga_ID": "N/A",
            "MAL_ID": "N/A",
            "Manga_Format": "N/A",
            "Manga_Status": "N/A",
            "Manga_Chapters": "N/A",
            "Manga_Volumes": "N/A",
            "Manga_Source": "N/A",
            "Manga_Country": "N/A",
            "Manga_MeanScore": "N/A",
            "Manga_Popularity": "N/A",
            "Manga_Trending": "N/A",
            "Manga_Favourites": "N/A",
            "Manga_Genres": "N/A",
            "Manga_StartDate": "N/A",
            "Manga_EndDate": "N/A",
            "Manga_Tags": "N/A",
            "Manga_IsAdult": "N/A"
        }

    # Append manga entry to the list
    manga_data_list.append(manga_entry)

    # Save batch if batch size is reached or if it's the last title
    if idx % batch_size == 0 or idx == len(titles_list):
        batch_number = (idx - 1) // batch_size + 1
        batch_file = output_file_template.format(batch_number)
        pd.DataFrame(manga_data_list).to_csv(batch_file, index=False)
        print(f"✅ Batch {batch_number} saved to {batch_file} with {len(manga_data_list)} records.")
        manga_data_list = []  # Reset for the next batch

    # Rate limiting (2 requests per second)
    time.sleep(2)

🔍 Processing 3691/5262: Ano Ko no Toriko5
🔍 Processing 3692/5262: Yankee Shota to Otaku Oneesan
🔍 Processing 3693/5262: Ani no Yome to Kurashitaimasu3
🔍 Processing 3694/5262: Kakagurui Midari
🔍 Processing 3695/5262: 12 sai12
🔍 Processing 3696/5262: The Idolmaster Cinderella Girls U149 Limited Edition
❌ Error fetching 'The Idolmaster Cinderella Girls U149 Limited Edition': {'errors': [{'message': 'Not Found.', 'status': 404, 'locations': [{'line': 3, 'column': 5}]}], 'data': {'Manga': None}}, adding as a placeholder.
🔍 Processing 3697/5262: Risou no Himo Seikatsu
🔍 Processing 3698/5262: Futsuu no Koiko chan
🔍 Processing 3699/5262: Manga de Wakaru FateGrand Order
🔍 Processing 3700/5262: Jealousy
✅ Batch 370 saved to manga_data/manga_data_batch_370.csv with 10 records.
🔍 Processing 3701/5262: Made in Abyss
🔍 Processing 3702/5262: DGray Man Official Fanbook Hai iro no Kiroku
❌ Error fetching 'DGray Man Official Fanbook Hai iro no Kiroku': {'errors': [{'message': 'Not Found.', 'status': 404