#Web scraping and Extracting Data using APIs

###Step 1: Install required libraries

In [21]:
# Install necessary libraries
!pip install pandas beautifulsoup4



###Step 2: Import libraries

In [22]:
# Import required libraries
import requests
import sqlite3
import pandas as pd
from bs4 import BeautifulSoup

###Step 3: Initialization

In [23]:
# Initialize variables
url = 'https://web.archive.org/web/20230902185655/https://en.everybodywiki.com/100_Most_Highly-Ranked_Films'
db_name = 'Movies.db'
table_name = 'Top_50'
csv_path = 'top_50_films.csv'
df = pd.DataFrame(columns=["Average Rank", "Film", "Year"])
count = 0


###Step 4: Fetch and parse the webpage

In [24]:
# Fetch the HTML content of the webpage
html_page = requests.get(url).text

# Parse the HTML using BeautifulSoup
data = BeautifulSoup(html_page, 'html.parser')


###Step 5: Extract relevant information

In [25]:
# Find the first table and extract rows
tables = data.find_all('tbody')
rows = tables[0].find_all('tr')

# Loop through rows to get data
for row in rows:
    if count < 50:
        col = row.find_all('td')
        if len(col) != 0:
            data_dict = {
                "Average Rank": col[0].contents[0],
                "Film": col[1].contents[0],
                "Year": col[2].contents[0]
            }
            df1 = pd.DataFrame(data_dict, index=[0])
            df = pd.concat([df, df1], ignore_index=True)
            count += 1
    else:
        break


###Step 6: Save data to a CSV file

In [27]:
# Save the DataFrame to a CSV file
df.to_csv(csv_path, index=False)
print(f"Data saved to {csv_path}")


Data saved to top_50_films.csv


###Step 7: Save data to an SQLite database

In [28]:
# Save the DataFrame to a SQLite database
conn = sqlite3.connect(db_name)
df.to_sql(table_name, conn, if_exists='replace', index=False)
conn.close()
print(f"Data saved to database {db_name} in table {table_name}")


Data saved to database Movies.db in table Top_50


###Step 8: Verify the DataFrame

In [29]:
# Display the DataFrame
print(df)


   Average Rank                                           Film  Year
0             1                                  The Godfather  1972
1             2                                   Citizen Kane  1941
2             3                                     Casablanca  1942
3             4                         The Godfather, Part II  1974
4             5                            Singin' in the Rain  1952
5             6                                         Psycho  1960
6             7                                    Rear Window  1954
7             8                                 Apocalypse Now  1979
8             9                          2001: A Space Odyssey  1968
9            10                                  Seven Samurai  1954
10           11                                        Vertigo  1958
11           12                                    Sunset Blvd  1950
12           13                                   Modern Times  1936
13           14                   