# Charlie Doubet & Matthew Riedl
## Scraped Data and CSV File Imports
## Cleaned CSV files

# Unemployment CSV File

In [3]:
import pandas as pd

In [4]:
unemployment_rates = pd.read_csv("unemployment_per_country.csv", sep=',', encoding='utf-8')

FileNotFoundError: [Errno 2] No such file or directory: 'unemployment_per_country.csv'

In [None]:
selected_columns = ['Country Name', '2019', '2020', '2021', '2022', '2023']
unemployment_rates_df = unemployment_rates[selected_columns]

In [None]:
display(unemployment_rates_df)

In [None]:
unemployment_rates_df.to_csv('cleaned_unemployment_rates.csv', index=False)

# Billionaire CSV File

In [None]:
billionaires = pd.read_csv("all_billionaires_1997_2024.csv", sep=',', encoding='utf-8', low_memory=False) 

In [None]:
billionaires = billionaires[billionaires['year'].between(2019, 2023)][['year', 'rank', 'full_name', 'country_of_residence', 'business_industries', 'net_worth']]
billionaires['business_industries'] = billionaires['business_industries'].str.strip("[]").str.replace("'", "")

In [None]:
display(billionaires)

In [None]:
billionaires.to_csv('cleaned_billionaires.csv', index=False)

# GDP Scraped Website

In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By 
from selenium.webdriver.chrome.service import Service 
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
import time

In [None]:
# Initialize the Selenium web driver
browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Navigate to the web page using the URL
url = "https://www.macrotrends.net/global-metrics/countries/ranking/gdp-growth-rate"
browser.get(url)
browser.maximize_window()

In [None]:
# Lists to store the data
countries = []
growth_2023 = []
growth_2022 = []
growth_2021 = []
growth_2020 = []
growth_2019 = []

In [None]:
try:
    # Find all country rows
    country_rows = browser.find_elements(By.TAG_NAME, "tr")[1:]  # Skip header row
    print(f"Found {len(country_rows)} countries.")
    
    for row in country_rows:
        try:
            # Extract data from each column
            columns = row.find_elements(By.TAG_NAME, "td")
            
            if len(columns) >= 6:  # Ensure we have all needed columns
                countries.append(columns[0].text.strip())
                growth_2023.append(columns[1].text.strip())
                growth_2022.append(columns[2].text.strip())
                growth_2021.append(columns[3].text.strip())
                growth_2020.append(columns[4].text.strip())
                growth_2019.append(columns[5].text.strip())
        except Exception as e:
            print(f"Error processing row: {e}")
            continue

finally:
    # Close the browser
    browser.quit()

In [None]:
# Create a DataFrame
gdp_df = pd.DataFrame({
    'Country': countries,
    '2023': growth_2023,
    '2022': growth_2022,
    '2021': growth_2021,
    '2020': growth_2020,
    '2019': growth_2019
})

In [None]:
# Save to CSV
gdp_df.to_csv('cleaned_gdp_growth_rates.csv', index=False)

display(gdp_df.head())

# Merge Billionaires and GDP

In [None]:
merged_data = pd.merge(billionaires, gdp_df, left_on='country_of_residence', right_on='Country', how='left')

In [None]:
# Rename the GDP columns
merged_data = merged_data.rename(columns={
    '2023': '2023_gdp_change',
    '2022': '2022_gdp_change',
    '2021': '2021_gdp_change',
    '2020': '2020_gdp_change',
    '2019': '2019_gdp_change'
})

# Drop the original 'Country' column
merged_data = merged_data.drop(columns=['Country'])

In [None]:
display(merged_data)

In [None]:
merged_data.to_csv('Billionaires_GDP.csv', index=False)

# Merged Billionaire and Unemployment Rates 

In [None]:
merged_df = billionaires.merge(unemployment_rates_df, left_on='country_of_residence', right_on='Country Name', how='left')

In [None]:
merged_df = merged_df.rename(columns={
    '2023': '2023_unemployment_rate',
    '2022': '2022_unemployment_rate',
    '2021': '2021_unemployment_rate',
    '2020': '2020_unemployment_rate',
    '2019': '2019_unemployment_rate'
})

# Drop the original 'Country' column
merged_df = merged_df.drop(columns=['Country Name'])

In [None]:
display(merged_df)

In [None]:
merged_df.to_csv('Billionaires_Unemployment.csv', index=False)