In [None]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from time import sleep
import json
from helpers import *
import warnings

warnings.filterwarnings("ignore")
plt.rcdefaults()

In [3]:
DATA_FOLDER = "../Data/"
movies_imdb = pd.read_csv(DATA_FOLDER + 'movies_imdb.csv',
                          converters={'languages': parse_list, 'countries': parse_list, 'genresCmu': parse_list,
                                      'genresImdb': parse_list})
movies_imdb.head(10)

Unnamed: 0,wikiID,fbID,name,boxOffice,runtime,languages,countries,genresCmu,releaseMonth,releaseYear,imdbID,titleType,originalTitle,isAdult,endYear,genresImdb,averageRating,numVotes,budget,popularity
0,3196793,/m/08yl5d,getting away with murder the jonbenet ramsey m...,,95.0,[English Language],[United States of America],"[Mystery, Biographical film, Drama, Crime Drama]",2.0,2000.0,tt0245916,tvMovie,Getting Away with Murder: The JonBenet Ramsey ...,0.0,\N,[Drama],6.0,69.0,,
1,28463795,/m/0crgdbh,brun bitter,,83.0,[Norwegian Language],[Norway],"[Crime Fiction, Drama]",,1988.0,tt0094806,movie,Brun bitter,0.0,\N,"[Crime, Drama]",5.6,40.0,,
2,9363483,/m/0285_cd,white of the eye,,110.0,[English Language],[United Kingdom],"[Thriller, Erotic thriller, Psychological thri...",,1987.0,tt0094320,movie,White of the Eye,0.0,\N,"[Horror, Mystery, Thriller]",6.1,2885.0,,3.121105
3,261236,/m/01mrr1,a woman in flames,,106.0,[German Language],[Germany],[Drama],,1983.0,tt0083949,movie,Die flambierte Frau,0.0,\N,[Drama],6.0,621.0,,
4,13696889,/m/03cfc81,the gangsters,,35.0,"[Silent film, English Language]",[United States of America],"[Short Film, Silent film, Indie, Black-and-whi...",5.0,1913.0,tt0002894,short,The Gangsters,0.0,\N,"[Comedy, Short]",6.8,16.0,,
5,18998739,/m/04jcqvw,the sorcerers apprentice,,86.0,[English Language],[South Africa],"[Family Film, Fantasy, Adventure, World cinema]",,2002.0,,,,,,,,,,
6,10408933,/m/02qc0j7,alexanders ragtime band,3600000.0,106.0,[English Language],[United States of America],"[Musical, Comedy, Black-and-white]",8.0,1938.0,tt0029852,movie,Alexander's Ragtime Band,0.0,\N,"[Drama, Music, Musical]",6.8,2264.0,2000000.0,0.632261
7,9997961,/m/06_y2j7,contigo y aqui,,70.0,[Spanish Language],[Argentina],"[Musical, Drama, Comedy]",,1974.0,tt0200545,movie,Contigo y aquí,0.0,\N,"[Comedy, Drama, Musical]",,,,
8,2345652,/m/075f66,city of the dead,,76.0,[English Language],[United Kingdom],"[Horror, Supernatural]",,1960.0,tt0053719,movie,The City of the Dead,0.0,\N,"[Horror, Mystery, Thriller]",6.7,8261.0,,1.514972
9,175026,/m/017n1p,sarah and son,,86.0,[English Language],[United States of America],"[Drama, Black-and-white]",,1930.0,tt0021335,movie,Sarah and Son,0.0,\N,"[Drama, Romance]",5.4,298.0,,


In [4]:
# function to fetch all movie data for a given imdb id
def fetch_movie_data(imdbID, api_key):
    base_url = "http://www.omdbapi.com/"
    params = {'i': imdbID, 'apikey': api_key}
    response = requests.get(base_url, params=params)
    movie_data = response.json()

    return movie_data


api_key = '2cdf120a'

movie_data_example = fetch_movie_data('tt0000001', api_key)
omdb = pd.DataFrame(columns=movie_data_example.keys())

# iterate over the 'imdbID' column with tqdm
for imdb_id in tqdm(movies_imdb['imdbID'], desc='Fetching movie data'):
    success = False
    retries = 0
    max_retries = 5  # maximum number of retries
    backoff_factor = 1.5  # growth rate of the delay between retries

    if pd.notna(imdb_id):
        while not success and retries < max_retries:
            try:
                # fetch movie data
                movie_data = fetch_movie_data(imdb_id, api_key)
                # convert dictionary to pandas Series and append to the omdb DataFrame
                omdb = pd.concat([omdb, pd.DataFrame([movie_data])], ignore_index=True)
                success = True
            except json.JSONDecodeError:
                print(f"Failed to fetch data for IMDb ID: {imdb_id}")
                retries += 1
                sleep_time = backoff_factor ** retries  # exponential backoff
                print(f"Retrying in {sleep_time} seconds...")
                sleep(sleep_time)
            except Exception as e:
                print(f"An error occurred: {e}")
                break
omdb

Fetching movie data: 100%|██████████| 81626/81626 [3:10:07<00:00,  7.16it/s]    


Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Plot,...,Type,DVD,BoxOffice,Production,Website,Response,Error,Season,Episode,seriesID
0,Getting Away with Murder: The JonBenet Ramsey ...,2000,,16 Feb 2000,95 min,Drama,Edward Lucas,"Michael A. Graham, Ted Haimes","Alice Barrett, Rod Britt, Hildy Brooks","Six-year-old JonBenét Ramsey, whose body was f...",...,movie,,,,,True,,,,
1,Brun bitter,1988,,17 Nov 1988,83 min,"Crime, Drama",Sølve Skagen,"Sølve Skagen, Gunnar Staalesen","Frank Krog, Kristin Kajander, Anne Krigsvoll","The lonely, divorced and disillusioned lawyer ...",...,movie,,,,,True,,,,
2,White of the Eye,1987,R,19 Jun 1987,110 min,"Horror, Mystery, Thriller",Donald Cammell,"China Kong, Donald Cammell, Andrew Klavan","David Keith, Cathy Moriarty, Alan Rosenberg",Somebody's killing rich married women in their...,...,movie,10 Aug 2016,"$225,132",,,True,,,,
3,A Woman in Flames,1983,Not Rated,10 Jun 1983,106 min,Drama,Robert van Ackeren,"Robert van Ackeren, Catharina Zwerenz","Gudrun Landgrebe, Mathieu Carrière, Hanns Zisc...","Eva, an upper-class housewife, frustratingly l...",...,movie,20 Mar 2017,,,,True,,,,
4,The Gangsters,1913,,29 May 1913,35 min,"Comedy, Short",Henry Lehrman,Fred Niblo,"Roscoe 'Fatty' Arbuckle, Fred Mace, Nick Cogley",An amusing burlesque of gang fighters. The pol...,...,movie,,,,,True,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50682,The Luck of the Navy,1927,,06 Dec 1929,110 min,"Action, Drama",Fred Paul,Clifford Mills,"Evelyn Laye, Henry Victor, Hayford Hobbs",A spy has his son steal an Admiral's submarine...,...,movie,,,,,True,,,,
50683,Esrefpasalilar,2010,,05 Mar 2010,100 min,"Comedy, Drama",Hüdaverdi Yavuz,Burak Tarik,"Sinan Albayrak, Burak Tarik, Turgay Tanülkü",The story of a newly appointed imam to an aban...,...,movie,,,,,True,,,,
50684,The Ghost Train,1941,Not Rated,05 May 1941,85 min,"Comedy, Horror",Walter Forde,"Arnold Ridley, J.O.C. Orton, Val Guest","Arthur Askey, Richard Murdoch, Kathleen Harrison",High jinks and chills ensue when a group of pe...,...,movie,,,,,True,,,,
50685,Knuckle,2011,R,09 Dec 2011,97 min,"Documentary, Biography, Drama",Ian Palmer,,"James Quinn McDonagh, Paddy Quinn McDonagh, Mi...",An epic 12-year journey into the brutal and se...,...,movie,18 Nov 2016,"$2,647",,,True,,,,


In [5]:
omdb.to_csv(DATA_FOLDER + 'OMDb/omdb.csv', index=False)