In [1]:
import pandas as pd
from rapidfuzz import process, fuzz

# Load data
manga_df = pd.read_csv("anilist_manga_list.csv")
titles_df = pd.read_csv("just_titles.csv")

# Prepare combined title options from AniList
manga_titles = manga_df[["title_romaji", "title_english"]].fillna("")

# Combine romaji and english into one set of unique title options
title_options = set(manga_titles["title_romaji"]).union(set(manga_titles["title_english"]))
title_options = [t for t in title_options if t.strip() != ""]

# Match function using RapidFuzz
def match_title(query):
    match, score, _ = process.extractOne(query, title_options, scorer=fuzz.token_sort_ratio)
    return match, score

# Create a list to store results
matched_data = []

# For each series title in just_titles.csv
for series in titles_df["series"]:
    best_match, score = match_title(series)

    # Find the matching row in manga_df
    matched_row = manga_df[(manga_df["title_romaji"] == best_match) | (manga_df["title_english"] == best_match)].head(1)

    if not matched_row.empty:
        row = matched_row.iloc[0].to_dict()
        row.update({
            "series": series,
            "matched_title": best_match,
            "match_score": score
        })
        matched_data.append(row)
    else:
        matched_data.append({
            "series": series,
            "matched_title": None,
            "match_score": 0
        })

# Convert to DataFrame and save
matched_df = pd.DataFrame(matched_data)
matched_df.to_csv("matched_manga_titles.csv", index=False)
print("\n✅ Fuzzy-matched titles saved to 'matched_manga_titles.csv'")



✅ Fuzzy-matched titles saved to 'matched_manga_titles.csv'


In [None]:
import pandas as pd
from rapidfuzz import fuzz
from collections import defaultdict

# Load the datasets
anilist_df = pd.read_csv('anilist_manga_list.csv')
titles_df = pd.read_csv('just_titles_copy.csv')  # 'series' column is expected here

# Create searchable title mapping (index -> [romaji, english])
title_map = defaultdict(list)
for i, row in anilist_df.iterrows():
    if pd.notna(row['title_romaji']):
        title_map[i].append(row['title_romaji'])
    if pd.notna(row['title_english']):
        title_map[i].append(row['title_english'])

# Function to get best match index and score
def get_best_match(query_title):
    best_score = -1
    best_index = -1
    for idx, title_list in title_map.items():
        for title in title_list:
            score = fuzz.ratio(query_title.lower(), title.lower())
            if score > best_score:
                best_score = score
                best_index = idx
    return best_index, best_score

# Match each title in just_titles_copy.csv
results = []
for query_title in titles_df['series']:
    best_idx, best_score = get_best_match(query_title)
    romaji = anilist_df.loc[best_idx, 'title_romaji'] if pd.notna(anilist_df.loc[best_idx, 'title_romaji']) else ""
    english = anilist_df.loc[best_idx, 'title_english'] if pd.notna(anilist_df.loc[best_idx, 'title_english']) else ""
    results.append({
        'id': best_idx,
        'series': query_title,
        'title_romaji': romaji,
        'title_english': english,
        'match_score': best_score
    })

# Save result to DataFrame and CSV
results_df = pd.DataFrame(results)
results_df.to_csv('matched_titles.csv', index=False)

print("Matching complete. Output saved to 'matched_titles.csv'.")


Matching complete. Output saved to 'matched_titles_with_scores.csv'.


In [2]:
import pandas as pd
from rapidfuzz import fuzz
from collections import defaultdict

# Load the datasets
anilist_df = pd.read_csv('anilist_manga_list2.csv')
titles_df = pd.read_csv('just_titles_copy.csv')  # 'series' column is expected here

# Create searchable title mapping (index -> [romaji, english])
title_map = defaultdict(list)
for i, row in anilist_df.iterrows():
    if pd.notna(row['title_romaji']):
        title_map[i].append(row['title_romaji'])
    if pd.notna(row['title_english']):
        title_map[i].append(row['title_english'])

# Function to get best match index and score
def get_best_match(query_title):
    best_score = -1
    best_index = -1
    for idx, title_list in title_map.items():
        for title in title_list:
            score = fuzz.ratio(query_title.lower(), title.lower())
            if score > best_score:
                best_score = score
                best_index = idx
    return best_index, best_score

# Match each title in just_titles_copy.csv
results = []
for query_title in titles_df['series']:
    best_idx, best_score = get_best_match(query_title)
    romaji = anilist_df.loc[best_idx, 'title_romaji'] if pd.notna(anilist_df.loc[best_idx, 'title_romaji']) else ""
    english = anilist_df.loc[best_idx, 'title_english'] if pd.notna(anilist_df.loc[best_idx, 'title_english']) else ""
    results.append({
        'id': anilist_df.loc[best_idx, 'id'],  # <--- this line is the fix
        'series': query_title,
        'title_romaji': romaji,
        'title_english': english,
        'match_score': best_score
    })


# Save result to DataFrame and CSV
results_df = pd.DataFrame(results)
results_df.to_csv('matched_titles2.csv', index=False)

print("Matching complete. Output saved to 'matched_titles2.csv'.")


Matching complete. Output saved to 'matched_titles2.csv'.


In [2]:
import pandas as pd
from rapidfuzz import fuzz
from collections import defaultdict

# Load the datasets
anilist_df = pd.read_csv('anilist_manga_list.csv')
titles_df = pd.read_csv('just_titles_copy.csv')  # expects 'series' column

# Create a title map: index -> list of possible titles (romaji + english)
title_map = defaultdict(list)
for i, row in anilist_df.iterrows():
    if pd.notna(row['title_romaji']):
        title_map[i].append(row['title_romaji'])
    if pd.notna(row['title_english']):
        title_map[i].append(row['title_english'])

# Function to get best match index and score using WRatio
def get_best_match(query_title):
    best_score = -1
    best_index = -1
    for idx, title_list in title_map.items():
        for title in title_list:
            score = fuzz.WRatio(query_title.lower(), title.lower())
            if score > best_score:
                best_score = score
                best_index = idx
    return best_index, best_score

# Perform matching
results = []
for query_title in titles_df['series']:
    best_idx, best_score = get_best_match(query_title)
    romaji = anilist_df.loc[best_idx, 'title_romaji'] if pd.notna(anilist_df.loc[best_idx, 'title_romaji']) else ""
    english = anilist_df.loc[best_idx, 'title_english'] if pd.notna(anilist_df.loc[best_idx, 'title_english']) else ""
    results.append({
        'series': query_title,
        'title_romaji': romaji,
        'title_english': english,
        'match_score': best_score
    })

# Save result
results_df = pd.DataFrame(results)
results_df.to_csv('matched_titles_with_scores2.csv', index=False)

print("Matching complete. Output saved to 'matched_titles_with_scores.csv'.")


Matching complete. Output saved to 'matched_titles_with_scores.csv'.
