In [1]:
import pandas as pd
import numpy as np
import imdb

In [2]:
df = pd.read_csv('backup.csv')
df = df[['movie_name', 'release_year']]
df

Unnamed: 0,movie_name,release_year
0,Ghosts of Mars,2001
1,White Of The Eye,1987
2,A Woman in Flames,1983
3,The Sorcerer's Apprentice,2002
4,Little city,1997
...,...,...
25565,Eşrefpaşalılar,2010
25566,Mermaids: The Body Found,2011
25567,Knuckle,2011
25568,The Super Dimension Fortress Macross II: Lover...,1992


In [8]:
from concurrent.futures import ThreadPoolExecutor, as_completed

results = []
records = df.to_dict(orient='records')[15000:]
failed = 0

with ThreadPoolExecutor(max_workers=14) as executor:
    future_to_record = {executor.submit(imdb.retrieve_imdb_key, record['movie_name'], record['release_year']): record for record in records}

    for future in as_completed(future_to_record):
        result = future.result()
        if result is not None:
            results.append(result)
        else:
            failed += 1

# Creating a new DataFrame from the processed data
imdb_keys_df = pd.DataFrame(results)

https://www.imdb.com/find/?q=Grand%20Slam
https://www.imdb.com/find/?q=Ulysses%27%20Gaze
https://www.imdb.com/find/?q=Commissar
https://www.imdb.com/find/?q=Siberiade
https://www.imdb.com/find/?q=The%20Tempest
https://www.imdb.com/find/?q=Eye%20of%20the%20Dolphin
https://www.imdb.com/find/?q=Careful%2C%20He%20Might%20Hear%20You
https://www.imdb.com/find/?q=Never%20Been%20Thawed
https://www.imdb.com/find/?q=Hot%20Moves
https://www.imdb.com/find/?q=Dead%20Clever
https://www.imdb.com/find/?q=Aftershock
https://www.imdb.com/find/?q=Back%20to%20the%20Future
https://www.imdb.com/find/?q=Lisa
https://www.imdb.com/find/?q=The%20Jack%20of%20Spades
https://www.imdb.com/find/?q=The%20Revolution%20Will%20Not%20Be%20Televised
https://www.imdb.com/find/?q=Kabuliwala
https://www.imdb.com/find/?q=Graystone
https://www.imdb.com/find/?q=Necromancy
https://www.imdb.com/find/?q=Quelques%20jours%20en%20septembre
https://www.imdb.com/find/?q=Kattu%20Vannu%20Vilichappol
https://www.imdb.com/find/?q=Daughters

In [11]:
imdb_keys_df[~imdb_keys_df['imdb_name'].isna()]

Unnamed: 0,imdb_id,imdb_name,imdb_year,movie_name,release_year
0,tt0085295,"Careful, He Might Hear You",1983,"Careful, He Might Hear You",1983
1,tt0363653,Grand Slam,1978,Grand Slam,1978
2,tt0918509,Dead Clever: The Life and Crimes of Julie Bott...,2007,Dead Clever,2007
4,tt0452711,Never Been Thawed,2005,Never Been Thawed,2005
6,tt1274300,The Tempest,2010,The Tempest,2010
...,...,...,...,...,...
10542,tt0120202,State and Main,2000,State and Main,2000
10543,tt0107057,Guilty as Sin,1993,Guilty as Sin,1993
10544,tt0109922,Gopi Kishan,1994,Gopi Kishan,1994
10548,tt1816585,Mermaids: The Body Found,2011,Mermaids: The Body Found,2011


In [12]:
imdb_keys_df[~imdb_keys_df['imdb_name'].isna()].reset_index(drop=True).to_excel('Output/imdb2.xlsx')

In [15]:
df1 = pd.read_excel('Output/imdb1.xlsx')
df2 = pd.read_excel('Output/imdb2.xlsx')

concat_df = pd.concat([df1, df2], axis=0).drop(columns=['Unnamed: 0']).reset_index(drop=True)

concat_df

Unnamed: 0,imdb_id,imdb_name,imdb_year,movie_name,release_year
0,tt0228333,Ghosts of Mars,2001,Ghosts of Mars,2001
1,tt0255819,Baby Boy,2001,Baby Boy,2001
2,tt0061637,Die Fahne von Kriwoj Rog,1967,Die Fahne von Kriwoj Rog,1967
3,tt0278891,Aaahh Belinda,1986,Aaah Belinda,1986
4,tt0097499,Henry V,1989,Henry V,1989
...,...,...,...,...,...
18735,tt0120202,State and Main,2000,State and Main,2000
18736,tt0107057,Guilty as Sin,1993,Guilty as Sin,1993
18737,tt0109922,Gopi Kishan,1994,Gopi Kishan,1994
18738,tt1816585,Mermaids: The Body Found,2011,Mermaids: The Body Found,2011


In [16]:
concat_df.to_csv('Output/imdb_keys.csv')