In [1]:
from bs4 import BeautifulSoup
import requests 
import numpy as np
import pandas as pd


In [2]:
URL = 'https://www.imdb.com/chart/top/'


In [3]:
HEADERS = ({'User_Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
           'Accept-language': 'en-US, en;q=0.5'})

In [4]:
webpage = requests.get(URL , headers=HEADERS)

In [5]:
# checking whether response is 200
webpage


<Response [200]>

In [6]:
# webpage.content will get the content and html parser will parse the html file
soup = BeautifulSoup(webpage.content, 'html.parser')

In [7]:
# creating dictionary to store the fetched data and then we will append the data 
d = {'rank':[], 'title':[],'released year':[], 'imdb rating':[]}

In [8]:
#fetching all the ranks for the following movie
for rank in soup.find_all('td', class_='titleColumn'):
    d['rank'].append(rank.text.split()[0].strip("."))

    
#fetching all the top 250 movie title
for title in soup.find_all('td' , class_='titleColumn'):
    d['title'].append(title.a.text)
    
    
# fetching release year 
for released_year in soup.find_all('span', class_ ="secondaryInfo"):
    d['released year'].append(released_year.text.strip("()"))
    
    
# fetching imdb_rating for following movie
for imdb_rating in soup.find_all('td', class_='ratingColumn imdbRating'):
    d['imdb rating'].append(imdb_rating.text.strip("\n"))

In [9]:
# pd.DataFrame will convert the dict into tabular (row and column format) 
# i.e data is aligned in a tabular fashion in rows and columns
top_movie_df = pd.DataFrame.from_dict(d)

# in case we get any empty value then we'll replace it with numpy nan and inplace=True will change it permanent
top_movie_df.replace('',np.nan,inplace=True)

#drop the nan row of title column (there will be no value without title)
top_movie_df=  top_movie_df.dropna(subset=['title'])

# convert the dataframe into csv file
top_movie_df.to_csv('top_rated_movie_all_time.csv',header=True,index=False)

In [10]:
top_movie_df.head(30)


Unnamed: 0,rank,title,released year,imdb rating
0,1,The Shawshank Redemption,1994,9.2
1,2,The Godfather,1972,9.2
2,3,The Dark Knight,2008,9.0
3,4,The Godfather: Part II,1974,9.0
4,5,12 Angry Men,1957,9.0
5,6,Schindler's List,1993,8.9
6,7,The Lord of the Rings: The Return of the King,2003,8.9
7,8,Pulp Fiction,1994,8.8
8,9,The Lord of the Rings: The Fellowship of the Ring,2001,8.8
9,10,"The Good, the Bad and the Ugly",1966,8.8
