In [1]:
from bs4 import BeautifulSoup
import requests
import os
import json
import pandas as pd
import csv

  from pandas.core import (


# Web Scraping

In [2]:
url = 'https://worldhappiness.report/ed/2024/'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

excel_link = None
for link in soup.find_all('a'):
    if "Data for Table 2.1" in link.get_text():
        excel_link = link.get('href')
        break

if excel_link:
    # Download the excel file
    excel_response = requests.get(excel_link)

    # Save the excel file
    with open("data2/world_happiness_data.xlsx", "wb") as f:
        f.write(excel_response.content)

    # Convert the excel file to csv
    df = pd.read_excel("data2/world_happiness_data.xlsx")
    df.to_csv("data2/world_happiness_data.csv", index=False)

    # Delete the excel file
    os.remove("data2/world_happiness_data.xlsx")

# Turing into JSON for Better Processing

In [3]:
country_data = {}

with open("data2/world_happiness_data.csv", "r") as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        country = row["Country name"]
        year = row["year"]
        data = {key: value for key, value in row.items() if key not in ["Country name", "year"]}

        # Check if any value in the data dictionary is empty
        if not all(data.values()):
            data = {key: None if value == "" else value for key, value in data.items()}

        if country not in country_data:
            country_data[country] = {}

        country_data[country][year] = data

json_data = json.dumps(country_data)
with open("data2/world_happiness_data.json", "w") as f:
    f.write(json_data)


## Change to Individual JSON Files for Each Attribute

In [4]:
with open("data2/world_happiness_data.json", "r") as f:
    data = json.load(f)

# Initialize a dictionary to hold data for each attribute
attribute_data = {}

# Extract data for each attribute
for country, years in data.items():
    for year, attributes in years.items():
        for attribute, value in attributes.items():
            # Initialize the country in attribute_data if it's not already there
            if attribute not in attribute_data:
                attribute_data[attribute] = {}
            if country not in attribute_data[attribute]:
                attribute_data[attribute][country] = {}

            # Add the year and value for the country in the attribute-specific dictionary
            attribute_data[attribute][country][year] = float(value) if value else None

## Update Values for Each Attribute for Easier Graphing

In [5]:
# We want to make every value between 0 and 10, so we need to update for each attribute accordingly
for attribute, countries in attribute_data.items():
    if attribute == "Life Ladder" or attribute == "Log GDP per capita":
        continue
    if attribute == "Healthy life expectancy at birth":
        for country, years in countries.items():
            for year, value in years.items():
                if value:
                    attribute_data[attribute][country][year] = value / 10
    else:
        for country, years in countries.items():
            for year, value in years.items():
                if value:
                    attribute_data[attribute][country][year] = value * 10

In [6]:
# Now, save each attribute data into a separate JSON file
for attribute, values in attribute_data.items():
    file_name = attribute.replace(" ", "_").lower() + ".json"
    with open(f"data2/{file_name}", "w") as f:
        json.dump(values, f)

# Make Sure TOPO.JSON Countries Align with World Happiness Report Countries

In [7]:
with open("data2/world_happiness_data.json", "r") as f:
    data = json.load(f)

with open("data2/countries.topo.json", "r") as f:
    countries = json.load(f)

countries_topo = countries['objects']['countries']['geometries']
countries_topo = {country['properties']['name'] for country in countries_topo}

countries_data = {country for country in data}

# Find countries that aren't in topojson but are in the data
missing_countries = countries_data - countries_topo
missing_countries

{'Bahrain',
 'Comoros',
 'Hong Kong S.A.R. of China',
 'Ivory Coast',
 'Maldives',
 'Malta',
 'Mauritius',
 'North Macedonia',
 'Singapore'}