In [1]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from time import sleep
import json
from helpers import *
import warnings

warnings.filterwarnings("ignore")
plt.rcdefaults()

In [2]:
DATA_FOLDER = "../Data/"
movies_imdb = pd.read_csv(DATA_FOLDER + 'movies_imdb.csv',
                          converters={'languages': parse_list, 'countries': parse_list, 'genresCmu': parse_list,
                                      'genresImdb': parse_list})
movies_imdb.head(10)

Unnamed: 0,wikiID,fbID,name,boxOffice,runtime,languages,countries,genresCmu,releaseMonth,releaseYear,imdbID,titleType,originalTitle,isAdult,endYear,genresImdb,averageRating,numVotes,budget,popularity
0,3196793,/m/08yl5d,Getting Away with Murder: The JonBenét Ramsey ...,,95.0,[English Language],[United States of America],"[Mystery, Biographical film, Drama, Crime Drama]",2.0,2000.0,,,,,,,,,,
1,28463795,/m/0crgdbh,Brun bitter,,83.0,[Norwegian Language],[Norway],"[Crime Fiction, Drama]",,1988.0,tt0094806,movie,Brun bitter,0.0,\N,"[Crime, Drama]",5.6,40.0,,
2,9363483,/m/0285_cd,White Of The Eye,,110.0,[English Language],[United Kingdom],"[Thriller, Erotic thriller, Psychological thri...",,1987.0,,,,,,,,,,
3,261236,/m/01mrr1,A Woman in Flames,,106.0,[German Language],[Germany],[Drama],,1983.0,tt0083949,movie,Die flambierte Frau,0.0,\N,[Drama],6.0,621.0,,
4,13696889,/m/03cfc81,The Gangsters,,35.0,"[Silent film, English Language]",[United States of America],"[Short Film, Silent film, Indie, Black-and-whi...",5.0,1913.0,tt0002894,short,The Gangsters,0.0,\N,"[Comedy, Short]",6.8,16.0,,
5,18998739,/m/04jcqvw,The Sorcerer's Apprentice,,86.0,[English Language],[South Africa],"[Family Film, Fantasy, Adventure, World cinema]",,2002.0,,,,,,,,,,
6,10408933,/m/02qc0j7,Alexander's Ragtime Band,3600000.0,106.0,[English Language],[United States of America],"[Musical, Comedy, Black-and-white]",8.0,1938.0,tt0029852,movie,Alexander's Ragtime Band,0.0,\N,"[Drama, Music, Musical]",6.8,2264.0,2000000.0,0.632261
7,9997961,/m/06_y2j7,Contigo y aquí,,,[Spanish Language],[Argentina],"[Musical, Drama, Comedy]",,1974.0,,,,,,,,,,
8,2345652,/m/075f66,City of the Dead,,76.0,[English Language],[United Kingdom],"[Horror, Supernatural]",,1960.0,,,,,,,,,,
9,175026,/m/017n1p,Sarah and Son,,86.0,[English Language],[United States of America],"[Drama, Black-and-white]",,1930.0,tt0021335,movie,Sarah and Son,0.0,\N,"[Drama, Romance]",5.4,298.0,,


In [3]:
# function to fetch all movie data for a given imdb id
def fetch_movie_data(imdbID, api_key):
    base_url = "http://www.omdbapi.com/"
    params = {'i': imdbID, 'apikey': api_key}
    response = requests.get(base_url, params=params)
    movie_data = response.json()

    return movie_data


api_key = '2cdf120a'

movie_data_example = fetch_movie_data('tt0000001', api_key)
omdb = pd.DataFrame(columns=movie_data_example.keys())

# iterate over the 'imdbID' column with tqdm
for imdb_id in tqdm(movies_imdb['imdbID'], desc='Fetching movie data'):
    success = False
    retries = 0
    max_retries = 5  # maximum number of retries
    backoff_factor = 1.5  # growth rate of the delay between retries

    if pd.notna(imdb_id):
        while not success and retries < max_retries:
            try:
                # fetch movie data
                movie_data = fetch_movie_data(imdb_id, api_key)
                # convert dictionary to pandas Series and append to the omdb DataFrame
                omdb = pd.concat([omdb, pd.DataFrame([movie_data])], ignore_index=True)
                success = True
            except json.JSONDecodeError:
                print(f"Failed to fetch data for IMDb ID: {imdb_id}")
                retries += 1
                sleep_time = backoff_factor ** retries  # exponential backoff
                print(f"Retrying in {sleep_time} seconds...")
                sleep(sleep_time)
            except Exception as e:
                print(f"An error occurred: {e}")
                break
omdb

Fetching movie data:   2%|▏         | 1858/81630 [06:40<4:18:32,  5.14it/s]  

Failed to fetch data for IMDb ID: tt14449300
Retrying in 1.5 seconds...


Fetching movie data:   7%|▋         | 5738/81630 [18:00<2:28:42,  8.51it/s]  

Failed to fetch data for IMDb ID: tt27792386
Retrying in 1.5 seconds...


Fetching movie data:  13%|█▎        | 10620/81630 [34:07<2:44:48,  7.18it/s] 

Failed to fetch data for IMDb ID: tt11725726
Retrying in 1.5 seconds...


Fetching movie data:  17%|█▋        | 14079/81630 [47:42<1:29:51, 12.53it/s]  

Failed to fetch data for IMDb ID: tt11455636
Retrying in 1.5 seconds...


Fetching movie data:  33%|███▎      | 27301/81630 [1:25:33<1:41:32,  8.92it/s]  

Failed to fetch data for IMDb ID: tt27628011
Retrying in 1.5 seconds...


Fetching movie data:  35%|███▍      | 28416/81630 [1:30:43<1:16:01, 11.67it/s]  

Failed to fetch data for IMDb ID: tt14134980
Retrying in 1.5 seconds...


Fetching movie data:  41%|████▏     | 33737/81630 [1:47:44<1:08:06, 11.72it/s]  

Failed to fetch data for IMDb ID: tt0763842
Retrying in 1.5 seconds...


Fetching movie data:  47%|████▋     | 38217/81630 [2:03:44<59:26, 12.17it/s]    

Failed to fetch data for IMDb ID: tt0435633
Retrying in 1.5 seconds...


Fetching movie data:  54%|█████▍    | 44089/81630 [2:22:15<58:19, 10.73it/s]    

Failed to fetch data for IMDb ID: tt6096972
Retrying in 1.5 seconds...


Fetching movie data:  61%|██████    | 49484/81630 [2:40:16<53:20, 10.04it/s]    

Failed to fetch data for IMDb ID: tt15683824
Retrying in 1.5 seconds...


Fetching movie data:  92%|█████████▏| 75484/81630 [3:45:57<07:51, 13.05it/s]    

Failed to fetch data for IMDb ID: tt1361548
Retrying in 1.5 seconds...


Fetching movie data:  93%|█████████▎| 75993/81630 [3:48:20<12:48,  7.34it/s]   

Failed to fetch data for IMDb ID: tt6354176
Retrying in 1.5 seconds...


Fetching movie data: 100%|██████████| 81630/81630 [4:01:23<00:00,  5.64it/s]   


Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Plot,...,DVD,BoxOffice,Production,Website,Response,totalSeasons,Error,Season,Episode,seriesID
0,Brun bitter,1988,,17 Nov 1988,83 min,"Crime, Drama",Sølve Skagen,"Sølve Skagen, Gunnar Staalesen","Frank Krog, Kristin Kajander, Anne Krigsvoll","The lonely, divorced and disillusioned lawyer ...",...,,,,,True,,,,,
1,A Woman in Flames,1983,Not Rated,10 Jun 1983,106 min,Drama,Robert van Ackeren,"Robert van Ackeren, Catharina Zwerenz","Gudrun Landgrebe, Mathieu Carrière, Hanns Zisc...","Eva, an upper-class housewife, frustratingly l...",...,20 Mar 2017,,,,True,,,,,
2,The Gangsters,1913,,29 May 1913,35 min,"Comedy, Short",Henry Lehrman,Fred Niblo,"Roscoe 'Fatty' Arbuckle, Fred Mace, Nick Cogley",An amusing burlesque of gang fighters. The pol...,...,,,,,True,,,,,
3,Alexander's Ragtime Band,1938,Approved,16 Aug 1938,106 min,"Drama, Music, Musical",Henry King,"Kathryn Scola, Lamar Trotti, Richard Sherman","Tyrone Power, Alice Faye, Don Ameche",This send-up of ragtime song and dance begins ...,...,22 Oct 2013,,,,True,,,,,
4,Sarah and Son,1930,Passed,14 Mar 1930,86 min,"Drama, Romance",Dorothy Arzner,"Zoe Akins, Timothy Shea","Ruth Chatterton, Fredric March, Fuller Mellish...","After years of abusing his wife, a ne'er-do-we...",...,,,,,True,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30369,Guilty as Sin,1993,R,04 Jun 1993,107 min,"Crime, Drama, Thriller",Sidney Lumet,Larry Cohen,"Rebecca De Mornay, Don Johnson, Stephen Lang",A female lawyer takes an accused wife-murderer...,...,16 Dec 2015,"$22,866,222",,,True,,,,,
30370,"The Time, the Place and the Girl",1946,Approved,28 Dec 1946,105 min,"Musical, Comedy, Romance",David Butler,"Francis Swann, Agnes Christine Johnston, Lynn ...","Dennis Morgan, Martha Vickers, Jack Carson",The stuffy manager of lovely opera singer Vick...,...,,,,,True,,,,,
30371,Geoul sokeuro,2003,Not Rated,14 Aug 2003,113 min,"Action, Fantasy, Horror",Sung-ho Kim,Sung-ho Kim,"Yoo Ji-tae, Myung-Min Kim, Hye-na Kim","An ex-cop, now working as a security guard in ...",...,,,,,True,,,,,
30372,The Luck of the Navy,1927,,06 Dec 1929,110 min,"Action, Drama",Fred Paul,Clifford Mills,"Evelyn Laye, Henry Victor, Hayford Hobbs",A spy has his son steal an Admiral's submarine...,...,,,,,True,,,,,


In [4]:
omdb.to_csv(DATA_FOLDER + 'OMDb/omdb.csv', index=False)