In [1]:
from urllib.request import urlopen
from urllib.parse import quote
import json
import pandas as pd
from config import api_key
import re

In [6]:
# Filepath
filepath = 'random_v3_df.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(filepath)

# Function to fetch IMDb rating from OMDb API
def get_imdb_rating(title):
    # URL encode the title to handle special characters
    title_encoded = quote(title)
    url = f'http://www.omdbapi.com/?apikey={api_key}&t={title_encoded}'

    try:
        with urlopen(url) as response:
            data = json.loads(response.read().decode())

            # Check if the response is valid and rating is not "N/A"
            if data['Response'] == 'True' and data['imdbRating'] != "N/A":
                return data['imdbRating']
            else:
                # Retry without year and special characters
                title_simplified = re.sub(r'[^\w\s]', '', title)  # Remove special characters
                title_simplified_encoded = quote(title_simplified)
                retry_url = f'http://www.omdbapi.com/?apikey={api_key}&t={title_simplified_encoded}'
                
                with urlopen(retry_url) as retry_response:
                    retry_data = json.loads(retry_response.read().decode())
                    if retry_data['Response'] == 'True' and retry_data['imdbRating'] != "N/A":
                        return retry_data['imdbRating']
                    else:
                        print(f"Error: 'imdbRating' not found for simplified title: {title_simplified}")
                        return None

    except KeyError:
        print(f"Error: 'imdbRating' not found for {title}")
        return None
    except Exception as e:  # Catch any other errors
        print(f"Error fetching rating for {title}: {e}")
        return None

# Create a new column 'imdb_rating'
df['imdb_rating'] = df['title_without_year'].astype(str).apply(get_imdb_rating)

# Drop rows with null values in 'imdb_rating'
df.dropna(subset=['imdb_rating'], inplace=True)

print(df.head().to_markdown(index=False, numalign="left", stralign="left"))

Error: 'imdbRating' not found for simplified title: Men in Black 3 
Error: 'imdbRating' not found for simplified title: X2 XMen United 
Error: 'imdbRating' not found for simplified title: Fantastic 4 Rise of the Silver Surfer 
Error: 'imdbRating' not found for simplified title: Borat Cultural Learnings of America for Make Benefit Glorious Nation of Kazakhstan 
Error: 'imdbRating' not found for simplified title: Spy Kids 3D Game Over 
Error: 'imdbRating' not found for simplified title: Lee Daniels The Butler 
Error: 'imdbRating' not found for simplified title: Saw The Final Chapter 
Error: 'imdbRating' not found for simplified title: Peter Pan 2 Return to Never Land 
Error: 'imdbRating' not found for simplified title: Spy Kids 4D All the Time in the World 
Error: 'imdbRating' not found for simplified title: Once Upon a Deadpool 
Error: 'imdbRating' not found for simplified title: Indiana Jones and the Raiders of the Lost Ark 
| title_without_year                         | year   | main_

In [7]:
df.head()

Unnamed: 0,title_without_year,year,main_genre,MPAA-Rating,Runtime,Distributor,Budget_$,Domestic_$,International_$,total_revenue_$,imdb_rating
0,Avengers: Endgame,2019,Action,PG-13,181,Walt Disney Studios Motion Pictures,"$356,000,000","$858,373,000","$1,939,128,328","$2,797,501,328",8.4
1,Avatar,2009,Sci-Fi,PG-13,162,Twentieth Century Fox,"$237,000,000","$749,766,139","$1,993,811,448","$2,743,577,587",7.9
2,Star Wars: Episode VII - The Force Awakens,2015,Sci-Fi,PG-13,138,Walt Disney Studios Motion Pictures,"$245,000,000","$936,662,225","$1,131,561,399","$2,068,223,624",7.8
3,Jurassic World,2015,Adventure,PG-13,124,Universal Pictures,"$150,000,000","$652,270,625","$1,018,130,012","$1,670,400,637",6.9
4,The Lion King,2019,Family,PG,118,Walt Disney Studios Motion Pictures,"$260,000,000","$543,638,043","$1,113,305,351","$1,656,943,394",8.5


In [8]:
df.to_csv("imdb_df.csv")