In [26]:
import json
from glob import glob
import re

In [27]:
files = glob('../data/items/**/*.json')

In [29]:
possible_season_endding_patterns = [
    r'([\(（]?(第)?\s?(\d+|[一二三四五六七八九十]+|FINAL|1st|2nd|3rd|\d+th)\s?(SEASON|Season|season|シリーズ|シーズン|クール|期|章|季|部)[\)）]?$)',
    r'([\(（]?(Season|SEASON|season|Part|part|PART|Volume|シーズン)\s?(\d+|[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+|II|III|IV|V|VI|VII|VIII|IX|X)[\)）]?$)',
]

def trim_season_mark(title: str) -> str:
    for pattern in possible_season_endding_patterns:
        title = re.sub(pattern, '', title).strip()
    return title

In [37]:
with open('matched_data.json') as f:
    matched_bangumis: dict[str, str] = json.load(f)['title_to_tmdb']

In [38]:
for fn in files:
    with open(fn, 'r') as f:
        bangumis = json.load(f)

    for i, item in enumerate(bangumis):
        title = trim_season_mark(item['title'])
        tmdb_id = matched_bangumis.get(title, None)
        if tmdb_id is None:
            continue

        item['sites'] = [site for site in item['sites'] if site['site'] != 'tmdb']
        item['sites'].append({
            "site": "tmdb",
            "id": tmdb_id,
            # "eps_offset": 0, # TODO: 添加集数偏移
        })

    with open(fn, 'w') as f:
        json.dump(bangumis, f, ensure_ascii=False, indent=2)