# Day 22 Exercises
### Number 1: Scraping a website and storing the data as _json_ file

In [None]:
import requests
from bs4 import BeautifulSoup
import json

url = 'http://www.bu.edu/president/boston-university-facts-stats/'

response = requests.get(url)
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')


    paragraphs = soup.find_all('p')

    data_list = [p.get_text() for p in paragraphs]

    with open('bu_data.json', 'w') as file:
        json.dump(data_list, file, indent=4)
    print("Data scraped and saved as 'bu_data.json'successfully!")
else:
    print("Failed to fetch the webpage")


### Number 2: Extracting a table and saving it as _json_ file

In [None]:
import requests
from bs4 import BeautifulSoup
import json

url = 'https://archive.ics.uci.edu/datasets'

response = requests.get(url)
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')

    #Finding all the tables on the page
    tables = soup.find_all('table')

    # Looping through the tables to find the one with desired data
    target_table = None
    for table in tables:
        
        #Checking if the table contains at least 5 rows 
        if len(table.find_all('tr')) >= 5:
            target_table = table
            break

    if target_table:  #Checking if a potential table was found
        
        #Converting the table to a list of dictionaries
        table_data = []
        headers = [header.text.strip() for header in target_table.find_all('th')]
        for row in target_table.find_all('tr')[1:]:
            row_data = {}
            cells = row.find_all('td')
            if len(cells) == len(headers):  #Ensuring the correct number of cells per row
                for i, cell in enumerate(cells):
                    row_data[headers[i]] = cell.text.strip()
                table_data.append(row_data)

        # Storing the table data as a JSON file
        with open('uci_ml_data.json', 'w') as file:
            json.dump(table_data, file, indent=4)
        print("The Table data extracted and saved as 'uci_ml_data.json'")
    else:
        print("The desired table not found on the webpage")
else:
    print("Failed to fetch the webpage")



### Number 3: Scraping the Presidents table and storing the data as _json_ file

In [None]:
# Task 3: Scrape the Presidents table from https://en.wikipedia.org/wiki/List_of_presidents_of_the_United_States
url = 'https://en.wikipedia.org/wiki/List_of_presidents_of_the_United_States'

response = requests.get(url)
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the table containing Presidents' data
    presidents_table = soup.find('table', {'class': 'wikitable'})

    # Extract table data
    table_data = []
    headers = [header.text.strip() for header in presidents_table.find_all('th')]
    for row in presidents_table.find_all('tr')[1:]:
        row_data = {}
        cells = row.find_all('td')
        if len(cells) == len(headers):  # Ensuring the correct number of cells per row
            for i, cell in enumerate(cells):
                row_data[headers[i]] = cell.text.strip()
            table_data.append(row_data)

    # Store Presidents' data as a JSON file
    with open('us_presidents.json', 'w') as file:
        json.dump(table_data, file, indent=4)
    print("The Presidents' table data extracted and saved as 'us_presidents.json'")
else:
    print("Failed to fetch the webpage")
