In [12]:
import os
import requests
from preprocessing.rss_parser import RSSParser

### Parse RSS, create dictionary of episode names and their raw text topics

In [13]:
parser = RSSParser("data/misc/episodes.rss")

episode_topics = {}
for filename in parser.episode_dict:
    episode_topics[filename] = parser.get_topics(filename)



#### Exclude hand-picked topics that are not movies or TV shows

In [7]:
exclude_topics = {
    "borítóképek",
    "nép akarata sorsolás",
    "villámkérdések",
    "felvezető",
    "filmbarátok zárthelyi"
}

In [11]:
title_set = set()
for k,v in episode_topics.items():
    {title_set.add(t[0]) for t in v if not any(topic in t[0].lower() for topic in exclude_topics)}

list(title_set)[:10]

['aquaman',
 'gengszterosztag',
 'patriots day',
 'desierto az ördög országútja',
 'class of 1984',
 'ryan közlegény megmentése',
 'az útvesztő tűzpróba',
 'tetovált lány usa',
 'szellemirtók 2016',
 'kaliforniai álom']

### Search for the titles using TMDB API and get IMDB link

In [14]:
with open(os.path.join("data", "misc", "tmdb.token"),"r") as f:
    tmdb_api_key = f.read().strip()

In [16]:
for title in list(title_set)[:10]:
    query = title.replace(" ", "+")
    tmdb_search = f"https://api.themoviedb.org/3/search/movie?api_key={tmdb_api_key}&query={query}"
    response = requests.get(tmdb_search)
    results = response.json()["results"] if response.status_code == 200 else []
    if len(results) > 0:
        tmdb_id = sorted(results, key=lambda x: x["popularity"], reverse=True)[0]["id"]
        tmdb_movie = f"https://api.themoviedb.org/3/movie/{tmdb_id}?api_key={tmdb_api_key}"
        response = requests.get(tmdb_movie)
        if response.status_code == 200:
            movie = response.json()
            imdb_id = movie["imdb_id"]
            imdb_movie = f"https://www.imdb.com/title/{imdb_id}"
            print(title, imdb_movie)

aquaman https://www.imdb.com/title/tt1477834
gengszterosztag https://www.imdb.com/title/tt1321870
patriots day https://www.imdb.com/title/tt4572514
desierto az ördög országútja https://www.imdb.com/title/tt3147312
class of 1984 https://www.imdb.com/title/tt0083739
ryan közlegény megmentése https://www.imdb.com/title/tt0120815
az útvesztő tűzpróba https://www.imdb.com/title/tt4046784
kaliforniai álom https://www.imdb.com/title/tt3783958
