In [2]:
# Writing a Python program to Scrape top 250 movies from IMDB website 
# and store it in an excel file

In [3]:
#import the modules

from bs4 import BeautifulSoup
import requests, openpyxl
import pandas as pd
import matplotlib.pyplot as plt


In [4]:
#making an excel file using openpyxl library
excel = openpyxl.Workbook()
sheet = excel.active
sheet.title = 'Top rated movies'
sheet.append(['name','rank','year','rating'])


In [5]:
#url for IMDB's top 250 movies

url = "https://www.imdb.com/chart/top/?ref_=nv_mv_25012"

In [6]:
try:
    
    source = requests.get(url)
    source.raise_for_status()
    soup = BeautifulSoup(source.text, 'html.parser')

    #By checking through the inspect element I found that each movie name is inside a <tr> tag which all has a parent <tbody> tag
    #finding the first <tbody> tag from html code and finding all <tr> tag inside that <tbody> tag which contains movie names.

    movies = soup.find('tbody', class_='lister-list').find_all('tr')
    all_movies = []
    for movie in movies:
        name = movie.find('td', class_='titleColumn').a.text
        rank = movie.find('td', class_='titleColumn').get_text(strip = True).split('.')[0]
        year = movie.find('td', class_='titleColumn').span.text.strip('()')
        rating = movie.find('td', class_='ratingColumn imdbRating').strong.text
        data = dict(movie_name = name, 
                   movie_rank = rank,
                   movie_year = year,
                   movie_rating = rating)
        all_movies.append(data)
        sheet.append([name,rank,year,rating])
        
except Exception as e:
    print(e)

#saving the excel file    
excel.save('IMDB top 250 movies.xlsx')
    