In [1]:
from bs4 import BeautifulSoup
import requests
import openpyxl

In [2]:
# Getting html source code of webpage
source= requests.get('https://www.imdb.com/chart/top/')

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

* User-Agent: This is a string that a browser sends to a web server, indicating the type of browser and operating system being used. Websites can use this information to serve different content to different devices or block automated requests. By adding a User-Agent that mimics a real browser (in this case, Google Chrome on Windows), the script is more likely to be accepted by the server.

source = requests.get('https://www.imdb.com/chart/top/', headers=headers)
source.raise_for_status()

* requests.get(): This function sends a GET request to the specified URL (https://www.imdb.com/chart/top/), with the custom headers included.
* source.raise_for_status(): This method checks if the request was successful. If the server returns a status code that indicates an error (such as 403), this line will raise an exception.

soup = BeautifulSoup(source.text, 'html.parser')
print(soup.prettify())

* BeautifulSoup(source.text, 'html.parser'): This line creates a BeautifulSoup object by parsing the HTML content (source.text) using the built-in HTML parser.
* soup.prettify(): This method formats the parsed HTML into a more readable structure and outputs it. The print() function then displays this formatted HTML

except Exception as e:
    print(e)

* This try-except block is used to handle potential errors that may occur during the HTTP request or HTML parsing. If an exception is raised, the error message is printed out.

In [3]:
excel= openpyxl.Workbook()

In [4]:
print(excel.sheetnames)

['Sheet']


In [5]:
sheet= excel.active
sheet.title= "Top Rated Movies"

In [6]:
print(excel.sheetnames)

['Top Rated Movies']


In [7]:
# Column Names
sheet.append(['Movie Rank','Movie Name','Year of Release','IMDB Rating'])

In [8]:
try:
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    source = requests.get('https://www.imdb.com/chart/top/', headers=headers)
    source.raise_for_status()

    soup = BeautifulSoup(source.text, 'html.parser')
    movies= soup.find('ul', class_= "ipc-metadata-list ipc-metadata-list--dividers-between sc-a1e81754-0 dHaCOW compact-list-view ipc-metadata-list--base", role= 'presentation').find_all('li', class_="ipc-metadata-list-summary-item sc-10233bc-0 TwzGn cli-parent")
    
    for movie in movies:
        name= movie.find('div', class_="ipc-title ipc-title--base ipc-title--title ipc-title-link-no-icon ipc-title--on-textPrimary sc-b189961a-9 bnSrml cli-title").a.text.split('.')[1]
        rank= movie.find('div', class_="ipc-title ipc-title--base ipc-title--title ipc-title-link-no-icon ipc-title--on-textPrimary sc-b189961a-9 bnSrml cli-title").a.text.split('.')[0]
        year= movie.find('div', class_="sc-b189961a-7 btCcOY cli-title-metadata").find('span',class_="sc-b189961a-8 hCbzGp cli-title-metadata-item").text
        rating= movie.find('span', class_="sc-b189961a-1 kcRAsW").find('span',class_="ipc-rating-star--rating").text
        
        print(rank,name, year, rating)
        sheet.append([rank,name,year,rating])
        
except Exception as e:
    print(e)


1  The Shawshank Redemption 1994 9.3
2  The Godfather 1972 9.2
3  The Dark Knight 2008 9.0
4  The Godfather Part II 1974 9.0
5  12 Angry Men 1957 9.0
6  Schindler's List 1993 9.0
7  The Lord of the Rings: The Return of the King 2003 9.0
8  Pulp Fiction 1994 8.9
9  The Lord of the Rings: The Fellowship of the Ring 2001 8.9
10  Il buono, il brutto, il cattivo 1966 8.8
11  Forrest Gump 1994 8.8
12  The Lord of the Rings: The Two Towers 2002 8.8
13  Fight Club 1999 8.8
14  Inception 2010 8.8
15  Star Wars: Episode V - The Empire Strikes Back 1980 8.7
16  The Matrix 1999 8.7
17  Goodfellas 1990 8.7
18  One Flew Over the Cuckoo's Nest 1975 8.7
19  Interstellar 2014 8.7
20  Se7en 1995 8.6
21  It's a Wonderful Life 1946 8.6
22  Shichinin no samurai 1954 8.6
23  The Silence of the Lambs 1991 8.6
24  Saving Private Ryan 1998 8.6
25  Cidade de Deus 2002 8.6


In [9]:
excel.save('IMDB Movies Rating.xlsx')