In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# neccessary to import modules from src
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
from pathlib import Path

import cartopy
from cartopy.io import shapereader
import cartopy.crs as ccrs
import geopandas
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns

from src.scripts.read_parse import read_csv_data, parse_data_remove_useless_cols
from src.scripts.analyze import parse_data_count_based_on_col, display_all_graphs
from src.scripts.visualizations.globe_plotting import plot_global_data, plot_usastate_data

In [None]:
year = 2021

billdf = read_csv_data(2021, '../src/data/')
# data = pd.read_csv(f'../src/data/billionaires_{year}.csv')
billdf = parse_data_remove_useless_cols(billdf)
# state_counts = parse_data_count_based_on_col(data, 'state')

In [None]:
# from https://www.statsamerica.org/downloads/default.aspx
uspopcount = pd.read_csv('../src/data/Population Estimates - US, States, Counties.csv')
uspopcount2020 = uspopcount[uspopcount["Year"] == 2020]

In [None]:
usbills = billdf[billdf["country"] == "United States"].dropna(subset=["state", "realTimeWorth"])

In [None]:
usbillsgrouped = usbills.groupby("state").count()
usbillsgrouped.head()

In [None]:
usbills_withpop = pd.merge(usbillsgrouped, uspopcount2020, left_on="state", right_on="Description")

In [None]:
# states = wortbillionsstates.index
states = usbills_withpop["Description"]
# data = wortbillionsstates.values
data = usbills_withpop["position"] / usbills_withpop["Population"]

In [None]:
plot_usastate_data(states, data, title="USA number of billionares per capita by state 2021", cbar_label="number of billionares per capita")

In [None]:
year = 2021

billdf = read_csv_data(2021, '../src/data/')
# data = pd.read_csv(f'../src/data/billionaires_{year}.csv')
billdf = parse_data_remove_useless_cols(billdf)
# state_counts = parse_data_count_based_on_col(data, 'state')

In [None]:
# unit is millions of dollars
billdf_summed = billdf.groupby("country").sum()
billdf_summed.head()

In [None]:
globdf = pd.read_csv("../src/data/wealth-est-agg.csv")

In [None]:
globdf_yr = globdf[globdf["year"] == year]

In [None]:
mergeddf = pd.merge(billdf_summed, globdf_yr, on="country")

In [None]:
mergeddf["ratios"] = (mergeddf["realTimeWorth"] * 1000000) / (mergeddf["total wealth"] * 1000000000)

In [None]:
mergeddf_fixed = mergeddf[mergeddf["ratios"] < 0.45]

In [None]:
mergeddf_fixed.head()

In [None]:
ratios = mergeddf_fixed["ratios"]

In [None]:
country_name_fix = {
    "Hong Kong": "Hong Kong S.A.R.",
    "Tanzania": "United Republic of Tanzania",
    "United States": "United States of America"
}

In [None]:
plot_global_data(
    mergeddf_fixed["country"], ratios, country_name_map=country_name_fix,
    title="Global comparison of total billionare to country total wealth in 2021",
    cbar_label="Total billionare to country total wealth ratio"
)

In [None]:
sorted_df = mergeddf_fixed.sort_values("ratios", ascending=False)

In [None]:
sns.barplot(sorted_df.iloc[:10], x="country", y="ratios")
plt.title("Top 10 countries with the highest total billionare to country wealth ratio")
plt.xlabel("Country")
plt.ylabel("Total billionare to country wealth ratio")
plt.xticks(rotation=35)
plt.show()

## scratch work below in case

In [None]:
# ripped straight from https://stackoverflow.com/questions/61460814/color-cartopy-map-countries-according-to-given-values
# get global country data from natural earth data (http://www.naturalearthdata.com/)

# get country borders
resolution = '10m'
category = 'cultural'
name = 'admin_0_countries'
shpfilename = shapereader.natural_earth(resolution, category, name)

# read the shapefile using geopandas
df = geopandas.read_file(shpfilename)

In [None]:
# Set up the canvas
fig = plt.figure(figsize=(8, 6))
central_lon, central_lat = 0, 0
extent = [-180, 180, -70, 90]
# ax = plt.axes(projection=cartopy.crs.Orthographic(central_lon, central_lat))
ax = plt.axes(projection=cartopy.crs.PlateCarree(central_lon))
ax.set_extent(extent)
# ax.gridlines()

# Add natural earth features and borders
ax.add_feature(cartopy.feature.BORDERS, linestyle=':', alpha=1)
ax.add_feature(cartopy.feature.OCEAN, facecolor=("lightblue"))
ax.add_feature(cartopy.feature.LAND)
ax.coastlines(resolution='110m')

# Insert your lists of countries and lag times here
countries = mergeddf_fixed["country"]

# Normalise the lag times to between 0 and 1 to extract the colour
ratios_norm = (ratios-np.nanmin(ratios))/(np.nanmax(ratios) - np.nanmin(ratios))

# Choose your colourmap here
cmap = matplotlib.colormaps.get_cmap('viridis')


for country, ratio_norm in zip(countries, ratios_norm):
    # read the borders of the country in this loop
    if country in country_name_fix:
        country = country_name_fix[country]
    poly = df.loc[df['ADMIN'] == country]['geometry'].values[0]
    # get the color for this country
    rgba = cmap(ratio_norm)
    # plot the country on a map
    ax.add_geometries(poly, crs=ccrs.PlateCarree(), facecolor=rgba, edgecolor='none', zorder=1)

# hacky way to generate scale of colorbar, invisible scatter plot
dummy_scat = ax.scatter(ratios, ratios, c=ratios, cmap=cmap, zorder=0, s=0)
fig.colorbar(mappable=dummy_scat, label='Total billionare to country total wealth ratio', orientation='horizontal', shrink=0.8)
fig.suptitle("Global comparison of total billionare to country total wealth in 2021")
fig.tight_layout()
plt.show()