In [1]:
import json
import requests
from tenacity import retry

with open('matched_data.json', 'r') as f:
    obj = json.load(f)
    unmatched = obj['no_match_titles'] + obj['unsure_titles']

with open('../dist/data.json', 'r') as f:
    items = json.load(f)['items']
    title_to_item = {
        item['title']: item for item in items
    }

In [None]:
api_key = ''

In [3]:
@retry
def request_agent_match(prompt: str):
    url = 'https://api.dify.ai/v1/chat-messages'
    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json',
    }
    data = {
        "inputs": {},
        "query": prompt,
        "response_mode": "streaming",
        "conversation_id": "",
        "user": "abc-123"
    }
    response = requests.post(url, headers=headers, json=data)
    responsed_json_text = ''
    for line in response.text.split('\n\n'):
        if not line.startswith('data: '):
            continue
        data = line[6:]
        chunk = json.loads(data)
        if chunk['event'] == 'agent_message':
            responsed_json_text += chunk['answer']
    return json.loads(responsed_json_text)

In [4]:
import os
if os.path.exists('agentic_matched.json'):
    with open('agentic_matched.json', 'r') as f:
        title_to_tmdb = json.load(f)
else:
    title_to_tmdb = {}

def save_matches():
    with open('agentic_matched.json', 'w') as f:
        json.dump(title_to_tmdb, f, ensure_ascii=False, indent=2)

In [5]:
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

def process_title(title: str):
    item = title_to_item[title]
    item = item.copy()
    bgm_site = [site for site in item['sites'] if site['site'] == 'bangumi']
    del item['sites']
    if bgm_site:
        bgm_site = bgm_site[0]
        item['bangumi_id'] = bgm_site['id']


    prompt = f"匹配下面的剧集：\n{json.dumps(item, ensure_ascii=False)}"
    result = request_agent_match(prompt)
    title_to_tmdb[title] = result['tmdb_id']


with ThreadPoolExecutor(max_workers=10) as executor:
    futures = []
    for title in unmatched:
        if title in title_to_tmdb:
            continue
        future = executor.submit(process_title, title)
        futures.append(future)

    for future in tqdm(as_completed(futures), total=len(futures)):
        try:
            save_matches()
        except Exception as e:
            print(f"Error processing title: {e}")

  0%|          | 0/1321 [00:00<?, ?it/s]

 62%|██████▏   | 818/1321 [57:19<51:40,  6.16s/it]  

Error processing title: dictionary changed size during iteration


 64%|██████▍   | 848/1321 [59:08<21:24,  2.71s/it]

Error processing title: dictionary changed size during iteration


100%|██████████| 1321/1321 [1:23:57<00:00,  3.81s/it]


In [None]:
title_to_tmdb['ふしぎなコアラブリンキー'] = 'tv/3149/season/1'
title_to_tmdb['劇場版 ニルスのふしぎな旅'] = 'movie/529667'
title_to_tmdb['うる星やつら3 リメンバー・マイ・ラブ'] = 'movie/150024'
title_to_tmdb['新しい動画 3つのはなし'] = 'movie/314201'
title_to_tmdb['ユニコ 黒い雲と白い羽'] = 'movie/38564'
title_to_tmdb['緑の猫'] = 'movie/400139'
title_to_tmdb['魔法使いサリー 劇場版'] = 'movie/589462'
# title_to_tmdb['ToHeart2 ダンジョントラベラーズ'] = 'tv/27438/season/0/episode/11'

In [7]:
save_matches()