# Charlie Doubet & Matthew Riedl
## Scraped Data and CSV File Imports
## Cleaned CSV files

# Unemployment CSV File

In [3]:
import pandas as pd

In [4]:
unemployment_rates = pd.read_csv("unemployment_per_country.csv", sep=',', encoding='utf-8')

In [5]:
selected_columns = ['Country Name', '2019', '2020', '2021', '2022', '2023']
unemployment_rates_df = unemployment_rates[selected_columns]

In [6]:
display(unemployment_rates_df)

Unnamed: 0,Country Name,2019,2020,2021,2022,2023
0,Aruba,,,,,
1,Africa Eastern and Southern,7.244670,7.661161,7.964493,7.448222,7.472824
2,Afghanistan,11.224000,11.710000,11.934000,14.100000,14.386000
3,Africa Western and Central,4.247944,4.697497,4.524016,3.737766,3.397782
4,Angola,16.497000,16.676000,15.799000,14.693000,14.620000
...,...,...,...,...,...,...
261,Kosovo,,,,,
262,"Yemen, Rep.",17.303000,17.972000,18.248000,17.515000,17.215000
263,South Africa,25.538000,24.339000,28.770000,28.838000,27.988000
264,Zambia,5.539000,6.032000,5.195000,5.993000,5.913000


In [7]:
unemployment_rates_df.to_csv('cleaned_unemployment_rates.csv', index=False)

# Billionaire CSV File

In [9]:
billionaires = pd.read_csv("all_billionaires_1997_2024.csv", sep=',', encoding='utf-8', low_memory=False) 

In [10]:
billionaires = billionaires[billionaires['year'].between(2019, 2023)][['year', 'rank', 'full_name', 'country_of_residence', 'business_industries', 'net_worth']]
billionaires['business_industries'] = billionaires['business_industries'].str.strip("[]").str.replace("'", "")

In [11]:
display(billionaires)

Unnamed: 0,year,rank,full_name,country_of_residence,business_industries,net_worth
19421,2019,1.0,Jeff Bezos,United States,Technology,131.0 B
19422,2019,2.0,Bill Gates,United States,Technology,96.5 B
19423,2019,3.0,Warren Buffett,United States,Finance and Investments,82.5 B
19424,2019,4.0,Bernard Arnault & family,France,Fashion & Retail,76.0 B
19425,2019,5.0,Carlos Slim Helu & family,Mexico,Telecom,64.0 B
...,...,...,...,...,...,...
31725,2023,2540.0,Yu Rong,China,Healthcare,1.0 B
31726,2023,2540.0,"Richard Yuengling, Jr. & family",United States,Food & Beverage,1.0 B
31727,2023,2540.0,Zhang Gongyun,China,Manufacturing,1.0 B
31728,2023,2540.0,Zhang Guiping & family,China,Real Estate,1.0 B


In [12]:
billionaires.to_csv('cleaned_billionaires.csv', index=False)

# GDP Scraped Website

In [14]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By 
from selenium.webdriver.chrome.service import Service 
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
import time

In [15]:
# Initialize the Selenium web driver
browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Navigate to the web page using the URL
url = "https://www.macrotrends.net/global-metrics/countries/ranking/gdp-growth-rate"
browser.get(url)
browser.maximize_window()

In [16]:
# Lists to store the data
countries = []
growth_2023 = []
growth_2022 = []
growth_2021 = []
growth_2020 = []
growth_2019 = []

In [17]:
try:
    # Find all country rows
    country_rows = browser.find_elements(By.TAG_NAME, "tr")[1:]  # Skip header row
    print(f"Found {len(country_rows)} countries.")
    
    for row in country_rows:
        try:
            # Extract data from each column
            columns = row.find_elements(By.TAG_NAME, "td")
            
            if len(columns) >= 6:  # Ensure we have all needed columns
                countries.append(columns[0].text.strip())
                growth_2023.append(columns[1].text.strip())
                growth_2022.append(columns[2].text.strip())
                growth_2021.append(columns[3].text.strip())
                growth_2020.append(columns[4].text.strip())
                growth_2019.append(columns[5].text.strip())
        except Exception as e:
            print(f"Error processing row: {e}")
            continue

finally:
    # Close the browser
    browser.quit()

Found 208 countries.


In [18]:
# Create a DataFrame
gdp_df = pd.DataFrame({
    'Country': countries,
    '2023': growth_2023,
    '2022': growth_2022,
    '2021': growth_2021,
    '2020': growth_2020,
    '2019': growth_2019
})

In [19]:
# Save to CSV
gdp_df.to_csv('cleaned_gdp_growth_rates.csv', index=False)

display(gdp_df.head())

Unnamed: 0,Country,2023,2022,2021,2020,2019
0,Macao,80.53%,-21.40%,23.54%,-54.34%,-2.56%
1,Guyana,33.02%,63.44%,20.01%,43.48%,5.35%
2,Armenia,8.70%,12.60%,5.80%,-7.20%,7.60%
3,Democratic Republic Of Congo,8.56%,8.92%,6.20%,1.74%,4.38%
4,Tajikistan,8.30%,8.00%,9.40%,4.40%,7.40%


# Merge Billionaires and GDP

In [21]:
merged_data = pd.merge(billionaires, gdp_df, left_on='country_of_residence', right_on='Country', how='left')

In [22]:
# Rename the GDP columns
merged_data = merged_data.rename(columns={
    '2023': '2023_gdp_change',
    '2022': '2022_gdp_change',
    '2021': '2021_gdp_change',
    '2020': '2020_gdp_change',
    '2019': '2019_gdp_change'
})

# Drop the original 'Country' column
merged_data = merged_data.drop(columns=['Country'])

In [23]:
display(merged_data)

Unnamed: 0,year,rank,full_name,country_of_residence,business_industries,net_worth,2023_gdp_change,2022_gdp_change,2021_gdp_change,2020_gdp_change,2019_gdp_change
0,2019,1.0,Jeff Bezos,United States,Technology,131.0 B,2.54%,1.94%,5.80%,-2.21%,2.47%
1,2019,2.0,Bill Gates,United States,Technology,96.5 B,2.54%,1.94%,5.80%,-2.21%,2.47%
2,2019,3.0,Warren Buffett,United States,Finance and Investments,82.5 B,2.54%,1.94%,5.80%,-2.21%,2.47%
3,2019,4.0,Bernard Arnault & family,France,Fashion & Retail,76.0 B,0.70%,2.45%,6.44%,-7.54%,1.84%
4,2019,5.0,Carlos Slim Helu & family,Mexico,Telecom,64.0 B,3.23%,3.95%,5.74%,-8.62%,-0.25%
...,...,...,...,...,...,...,...,...,...,...,...
12304,2023,2540.0,Yu Rong,China,Healthcare,1.0 B,5.20%,2.99%,8.45%,2.24%,5.95%
12305,2023,2540.0,"Richard Yuengling, Jr. & family",United States,Food & Beverage,1.0 B,2.54%,1.94%,5.80%,-2.21%,2.47%
12306,2023,2540.0,Zhang Gongyun,China,Manufacturing,1.0 B,5.20%,2.99%,8.45%,2.24%,5.95%
12307,2023,2540.0,Zhang Guiping & family,China,Real Estate,1.0 B,5.20%,2.99%,8.45%,2.24%,5.95%


In [24]:
merged_data.to_csv('Billionaires_GDP.csv', index=False)

# Merged Billionaire and Unemployment Rates 

In [26]:
merged_df = billionaires.merge(unemployment_rates_df, left_on='country_of_residence', right_on='Country Name', how='left')

In [27]:
merged_df = merged_df.rename(columns={
    '2023': '2023_unemployment_rate',
    '2022': '2022_unemployment_rate',
    '2021': '2021_unemployment_rate',
    '2020': '2020_unemployment_rate',
    '2019': '2019_unemployment_rate'
})

# Drop the original 'Country' column
merged_df = merged_df.drop(columns=['Country Name'])

In [28]:
display(merged_df)

Unnamed: 0,year,rank,full_name,country_of_residence,business_industries,net_worth,2019_unemployment_rate,2020_unemployment_rate,2021_unemployment_rate,2022_unemployment_rate,2023_unemployment_rate
0,2019,1.0,Jeff Bezos,United States,Technology,131.0 B,3.669,8.055,5.349,3.650,3.625
1,2019,2.0,Bill Gates,United States,Technology,96.5 B,3.669,8.055,5.349,3.650,3.625
2,2019,3.0,Warren Buffett,United States,Finance and Investments,82.5 B,3.669,8.055,5.349,3.650,3.625
3,2019,4.0,Bernard Arnault & family,France,Fashion & Retail,76.0 B,8.415,8.010,7.874,7.308,7.323
4,2019,5.0,Carlos Slim Helu & family,Mexico,Telecom,64.0 B,3.477,4.441,4.019,3.256,2.812
...,...,...,...,...,...,...,...,...,...,...,...
12304,2023,2540.0,Yu Rong,China,Healthcare,1.0 B,4.560,5.000,4.550,4.980,4.670
12305,2023,2540.0,"Richard Yuengling, Jr. & family",United States,Food & Beverage,1.0 B,3.669,8.055,5.349,3.650,3.625
12306,2023,2540.0,Zhang Gongyun,China,Manufacturing,1.0 B,4.560,5.000,4.550,4.980,4.670
12307,2023,2540.0,Zhang Guiping & family,China,Real Estate,1.0 B,4.560,5.000,4.550,4.980,4.670


In [29]:
merged_df.to_csv('Billionaires_Unemployment.csv', index=False)