
# Set up the notebook

In [1]:

%pprint

Pretty printing has been turned OFF


In [2]:

import sys

# Insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '../py')

from choropleth_utils import ChoroplethUtilities
from stats_scraping_utils import StatsScrapingUtilities
from storage import Storage
import pandas as pd
import re
import os
import numpy as np

s = Storage()
ssu = StatsScrapingUtilities(s=s)

In [52]:

column_description_dict = s.load_object('column_description_dict')
us_stats_df = s.load_object('us_stats_df')

all_countries_df = s.load_object('all_countries_df').set_index('country_code', drop=True)
all_countries_df.country_name = all_countries_df.country_name.map(lambda x: ssu.country_name_dict.get(x, x))
s.store_objects(all_countries_df=all_countries_df.reset_index(drop=False))

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\all_countries_df.pkl



----
# Get State/Country Physicians per Capita Equivalents


## Clean and prepare US States dataset

In [4]:

url = 'https://www.beckershospitalreview.com/workforce/this-state-has-the-most-physicians-per-capita.html'
tables_list = ssu.get_page_tables(url)

[(0, (52, 4))]


In [5]:

us_states_df = tables_list[0].copy()
states_target_column_name = 'physicians_per_10k'
us_states_df.columns = ['state_name', 'total_population', 'total_active_physicians', states_target_column_name]
us_states_df = us_states_df.iloc[1:]
us_states_df[states_target_column_name] = pd.to_numeric(us_states_df[states_target_column_name].map(lambda x: re.sub(r'[^0-9\.]+', '', str(x))),
                                                        errors='coerce', downcast='float')
us_states_df[states_target_column_name] = us_states_df[states_target_column_name].map(lambda x: x/10)
us_states_df.sample(5)

Unnamed: 0,state_name,total_population,total_active_physicians,physicians_per_10k
22,Alaska,731545,2101,287.2
1,District of Columbia,705749,6147,87.1
32,Arizona,7278717,18343,25.2
2,Massachusetts,6892503,32116,46.6
15,Ohio,11689100,35333,302.3


In [14]:

# Why does MA only have 46.6 physicians per 10K whilst NY has 389.4?
mask_series = us_states_df.state_name.isin(['Massachusetts', 'New York'])
us_states_df[mask_series]

Unnamed: 0,state_name,total_population,total_active_physicians,physicians_per_10k
2,Massachusetts,6892503,32116,46.6
4,New York,19453561,75749,389.4


In [6]:

# Remove US states duplicates and misspellings
states_list = sorted(set(us_states_df.state_name).symmetric_difference(set(us_stats_df.index)))
doubles_df = ssu.check_4_doubles(states_list)
mask_series = (doubles_df.max_similarity > 0.6)
columns_list = ['first_item', 'second_item', 'max_similarity']
doubles_df[mask_series][columns_list].sort_values('max_similarity', ascending=False)

Unnamed: 0,first_item,second_item,max_similarity



## Clean and prepare Countries dataset

In [7]:

tables_list = ssu.get_page_tables('https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_number_of_physicians', driver=None)

[(1, (188, 5)), (0, (1, 2))]


In [8]:

countries_df = tables_list[1].copy()
# print(countries_df.columns.tolist())
countries_target_column_name = 'physicians_per_10k'
LATEST_COLUMN_NAME = countries_target_column_name + '_latest'
EARLIER_COLUMN_NAME = countries_target_column_name + '_2013'
EARLIEST_COLUMN_NAME = countries_target_column_name + '_2009'
countries_df.columns = ['country_name', 'total_population', EARLIEST_COLUMN_NAME, EARLIER_COLUMN_NAME, LATEST_COLUMN_NAME]
for cn in [EARLIEST_COLUMN_NAME, EARLIER_COLUMN_NAME, LATEST_COLUMN_NAME]:
    countries_df[cn] = pd.to_numeric(countries_df[cn].map(lambda x: re.sub(r'[^0-9\.]+', '', str(x))), errors='coerce', downcast='float')
def f(row_series):
    latest = row_series[LATEST_COLUMN_NAME]
    earlier = row_series[EARLIER_COLUMN_NAME]
    earliest = row_series[EARLIEST_COLUMN_NAME]
    for cv in [latest, earlier, earliest]:
        if str(cv) != 'nan':

            return cv
countries_df[countries_target_column_name] = countries_df.apply(f, axis='columns')
countries_df.head(5)

Unnamed: 0,country_name,total_population,physicians_per_10k_2009,physicians_per_10k_2013,physicians_per_10k_latest,physicians_per_10k
0,Australia,19612,100.0,327.0,,327.0
1,Austria,31175,380.0,483.0,,483.0
2,Azerbaijan,32388,380.0,340.0,,340.0
3,Albania,3626,110.0,115.0,,115.0
4,Algeria,40857,120.0,121.0,172.0,172.0


In [16]:

# France has 327.0 physicians per 10K whilst NY has 389.4?
mask_series = countries_df.country_name.isin(['France'])
countries_df[mask_series]

Unnamed: 0,country_name,total_population,physicians_per_10k_2009,physicians_per_10k_2013,physicians_per_10k_latest,physicians_per_10k
170,France,227683,370.0,319.0,327.0,327.0


In [9]:

# Remove country duplicates and misspellings
countries_df.country_name = countries_df.country_name.map(lambda x: ssu.country_name_dict.get(x, x))
countries_list = sorted(set(countries_df.country_name).symmetric_difference(set(all_countries_df.country_name)))
doubles_df = ssu.check_4_doubles(countries_list)
mask_series = (doubles_df.max_similarity > 0.6)
columns_list = ['first_item', 'second_item', 'max_similarity']
doubles_df[mask_series][columns_list].sort_values('max_similarity', ascending=False)

Unnamed: 0,first_item,second_item,max_similarity
12,British Virgin Islands,US Virgin Islands,0.769231
31,Greenland,Grenada,0.75
16,Cayman Islands,Åland Islands,0.740741
59,Sint Maarten,St. Martin,0.727273
35,Guernsey,Jersey,0.714286
10,Bouvet Island,Faroe Islands,0.692308
25,Faroe Islands,Åland Islands,0.692308
27,French Guiana,French Polynesia,0.62069



## Create Equivalence Dictionaries

In [10]:

state_to_country_equivalent_dict, country_to_state_equivalent_dict = ssu.get_country_state_equivalents(
    countries_df, 'country_name', countries_target_column_name,
    us_states_df, 'state_name', states_target_column_name,
    cn_col_explanation=None, st_col_explanation=None,
    countries_set=None, states_set=None, verbose=False)

In [11]:

string_column_name = 'Country_Equivalent_Physicians_per_Capita'
us_stats_df[string_column_name] = us_stats_df.index.map(lambda x: state_to_country_equivalent_dict.get(x, x))
s.store_objects(us_stats_df=us_stats_df)

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\us_stats_df.pkl


In [12]:

states_dict = us_states_df.set_index('state_name')[states_target_column_name].to_dict()
states_min = us_states_df[states_target_column_name].min()
us_stats_df[states_target_column_name] = us_stats_df.index.map(lambda x: states_dict.get(x, states_min))
column_description_dict[states_target_column_name] = 'Physicians per Capita by State (latest)'
s.store_objects(column_description_dict=column_description_dict)

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\column_description_dict.pkl



## Choropleth

In [13]:

c.create_label_line_file()
svg_file_path = c.create_country_colored_labeled_map(numeric_column_name=states_target_column_name,
                                                     string_column_name=string_column_name,
                                                     one_country_df=us_stats_df)
print(os.path.abspath(svg_file_path))

C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\svg\US_physicians_per_10k_Country_Equivalent_Physicians_per_Capita.svg



----
# Get State/Country Health Care Costs Equivalents


## Clean and prepare US States dataset

In [4]:

url = 'https://worldpopulationreview.com/state-rankings/health-care-costs-by-state'
tables_list = ssu.get_page_tables(url)

[(0, (50, 2))]


In [5]:

us_states_df = tables_list[0].copy()
states_target_column_name = 'spending_per_capita'
us_states_df.columns = ['state_name', states_target_column_name]
us_states_df[states_target_column_name] = pd.to_numeric(us_states_df[states_target_column_name].map(lambda x: re.sub(r'[^0-9\.]+', '', str(x))),
                                                        errors='coerce', downcast='integer')
us_states_df.sample(5)

Unnamed: 0,state_name,spending_per_capita
30,Indiana,7651.0
40,New Hampshire,7214.0
34,Arizona,7549.0
49,Wyoming,
4,New York,9851.0


In [25]:

# Remove US states duplicates and misspellings
states_list = sorted(set(us_states_df.state_name).symmetric_difference(set(us_stats_df.index)))
doubles_df = ssu.check_4_doubles(states_list)
mask_series = (doubles_df.max_similarity > 0.6)
columns_list = ['first_item', 'second_item', 'max_similarity']
doubles_df[mask_series][columns_list].sort_values('max_similarity', ascending=False)

Unnamed: 0,first_item,second_item,max_similarity



## Clean and prepare Countries dataset

In [6]:

driver = ssu.get_driver()
tables_list = ssu.get_page_tables('https://data.worldbank.org/indicator/SH.XPD.CHEX.PC.CD', driver=driver)
driver.close()
if not tables_list:
    tables_list = ssu.get_page_tables('../data/html/world_bank_healthcare_apending_per_capita_by_country.html')

Getting the FireFox driver
No tables found
[]
[(0, (248, 3))]


In [7]:

countries_df = tables_list[0].copy()
countries_target_column_name = 'spending_per_capita'
countries_df.columns = ['country_name', 'study_year', countries_target_column_name]
# print(countries_df.columns.tolist())
countries_df[countries_target_column_name] = pd.to_numeric(countries_df[countries_target_column_name].map(lambda x: re.sub(r'[^0-9\.]+', '',
                                                                                                                           str(x))),
                                                        errors='coerce', downcast='float')
countries_df.sample(5)

Unnamed: 0,country_name,study_year,spending_per_capita
128,"Micronesia, Fed. Sts.",2019.0,4152.0
138,Nepal,2019.0,5325.0
137,Nauru,2019.0,104945.0
185,Sudan,2019.0,4693.0
1,Albania,2018.0,27491.0


In [None]:

# Remove country duplicates and misspellings
countries_df.country_name = countries_df.country_name.map(lambda x: ssu.country_name_dict.get(x, x))
countries_list = sorted(set(countries_df.country_name).symmetric_difference(set(all_countries_df.country_name)))
doubles_df = ssu.check_4_doubles(countries_list)
mask_series = (doubles_df.max_similarity > 0.6)
columns_list = ['first_item', 'second_item', 'max_similarity']
doubles_df[mask_series][columns_list].sort_values('max_similarity', ascending=False)


## Create Equivalence Dictionaries

In [10]:

state_to_country_equivalent_dict, country_to_state_equivalent_dict = ssu.get_country_state_equivalents(
    countries_df, 'country_name', countries_target_column_name,
    us_states_df, 'state_name', states_target_column_name,
    cn_col_explanation=None, st_col_explanation=None,
    countries_set=None, states_set=None, verbose=False)

In [11]:

string_column_name = 'Country_Equivalent_Health_Care_Costs'
us_stats_df[string_column_name] = us_stats_df.index.map(lambda x: state_to_country_equivalent_dict.get(x, x))

In [12]:

states_dict = us_states_df.set_index('state_name')[states_target_column_name].to_dict()
states_min = us_states_df[states_target_column_name].min()
us_stats_df[states_target_column_name] = us_stats_df.index.map(lambda x: states_dict.get(x, states_min))
column_description_dict[states_target_column_name] = 'Health Care Costs by State (2022)'
s.store_objects(column_description_dict=column_description_dict)

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\column_description_dict.pkl



---
## Choropleth

In [13]:

c.create_label_line_file()
svg_file_path = c.create_country_colored_labeled_map(numeric_column_name=states_target_column_name,
                                                     string_column_name=string_column_name,
                                                     one_country_df=us_stats_df)
print(os.path.abspath(svg_file_path))

C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\svg\US_spending_per_capita_Country_Equivalent_Health_Care_Costs.svg



----
# Get State/Country Life Expectency Equivalents


## Clean and prepare US States dataset

In [5]:

url = 'https://worldpopulationreview.com/state-rankings/life-expectancy-by-state'
tables_list = ssu.get_page_tables(url)

[(0, (50, 7))]


In [6]:

life_expectancy_us_states_df = tables_list[0].copy()
life_expectancy_us_states_df.columns = ['state_name', 'life_expectancy', 'life_expectancy_black', 'life_expectancy_latino',
                                        'life_expectancy_asian', 'life_expectancy_native_american', 'life_expectancy_white']
life_expectancy_us_states_df.sample(5)

Unnamed: 0,state_name,life_expectancy,life_expectancy_black,life_expectancy_latino,life_expectancy_asian,life_expectancy_native_american,life_expectancy_white
33,North Carolina,77.8,74.7,,88.9,76.6,78.3
41,South Carolina,76.2,74.0,,,,77.8
8,Massachusetts,79.9,78.8,87.1,89.1,,80.4
47,Alabama,74.9,72.9,,76.0,,76.0
5,New Jersey,80.4,75.5,84.7,89.4,,80.3



## Clean and prepare Countries dataset

In [7]:

url = 'https://worldpopulationreview.com/countries/life-expectancy'
driver = ssu.get_driver()
tables_list = ssu.get_page_tables(url, driver=driver)
driver.close()

Getting the FireFox driver
[(0, (237, 4))]


In [8]:

life_expectancy_countries_df = tables_list[0].copy()
life_expectancy_countries_df.columns = ['country_name', 'life_expectancy', 'life_expectancy_males', 'life_expectancy_females']
life_expectancy_countries_df.sample(5)

Unnamed: 0,country_name,life_expectancy,life_expectancy_males,life_expectancy_females
177,Puerto Rico,79.72,75.58,83.9
55,Senegal,67.91,65.47,70.2
13,Seychelles,71.74,68.24,76.04
162,Bahamas,74.36,70.76,77.84
114,Moldova,68.62,64.22,73.32


In [9]:

mask_series = life_expectancy_countries_df.duplicated(subset=['country_name'], keep=False)
life_expectancy_countries_df[mask_series]

Unnamed: 0,country_name,life_expectancy,life_expectancy_males,life_expectancy_females


In [10]:

life_expectancy_countries_df.country_name = life_expectancy_countries_df.country_name.map(lambda x: ssu.country_name_dict.get(x, x))
countries_list = sorted(set(life_expectancy_countries_df.country_name).symmetric_difference(set(all_countries_df.country_name)))
doubles_df = ssu.check_4_doubles(countries_list)
mask_series = (doubles_df.max_similarity > 0.6)
columns_list = ['first_item', 'second_item', 'max_similarity']
doubles_df[mask_series][columns_list].sort_values('max_similarity', ascending=False)

Unnamed: 0,first_item,second_item,max_similarity
13,Falkland Islands,Åland Islands,0.83


In [11]:

states_list = sorted(set(life_expectancy_us_states_df.state_name).symmetric_difference(set(us_stats_df.index)))
doubles_df = ssu.check_4_doubles(states_list)
mask_series = (doubles_df.max_similarity > 0.6)
columns_list = ['first_item', 'second_item', 'max_similarity']
doubles_df[mask_series][columns_list].sort_values('max_similarity', ascending=False)

Unnamed: 0,first_item,second_item,max_similarity


In [12]:

state_to_country_equivalent_dict, country_to_state_equivalent_dict = ssu.get_country_state_equivalents(life_expectancy_countries_df,
                                                                                                       'country_name',
                                                                                                       'life_expectancy',
                                                                                                       life_expectancy_us_states_df,
                                                                                                       'state_name',
                                                                                                       'life_expectancy',
                                                                                                       verbose=False)
state_to_country_equivalent_dict, country_to_state_equivalent_dict = ssu.get_country_state_equivalents(
    countries_df, 'country_name', countries_target_column_name,
    us_states_df, 'state_name', states_target_column_name,
    cn_col_explanation=None, st_col_explanation=None,
    countries_set=None, states_set=None, verbose=False)

In [13]:

us_stats_df['Country_Equivalent_Life_Expectancy'] = us_stats_df.index.map(lambda x: state_to_country_equivalent_dict.get(x, x))

In [14]:

life_expectancy_dict = life_expectancy_us_states_df.set_index('state_name').life_expectancy.to_dict()
min_life = life_expectancy_us_states_df.life_expectancy.min()
us_stats_df['life_expectancy'] = us_stats_df.index.map(lambda x: life_expectancy_dict.get(x, min_life))
column_description_dict['life_expectancy'] = 'Overall average life expectency (2020)'
s.store_objects(column_description_dict=column_description_dict)

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\column_description_dict.pkl



---
## Choropleth

In [15]:

c.create_label_line_file()
numeric_column_name = 'life_expectancy'
string_column_name = 'Country_Equivalent_Life_Expectancy'
svg_file_path = c.create_country_colored_labeled_map(numeric_column_name=numeric_column_name,
                                                     string_column_name=string_column_name,
                                                     one_country_df=us_stats_df)
print(os.path.abspath(svg_file_path))

C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\svg\US_life_expectancy_Country_Equivalent_Life_Expectancy.svg



----
# Get State/Country Obesity Equivalents


## Clean and prepare US States dataset

In [51]:

url = 'https://en.wikipedia.org/wiki/Obesity_in_the_United_States'
tables_list = ssu.get_page_tables(url)

[(0, (56, 6)), (1, (24, 2)), (10, (11, 2)), (12, (11, 2)), (4, (9, 2)), (5, (8, 2)), (6, (6, 2)), (8, (4, 2)), (2, (3, 2)), (9, (3, 2)), (11, (2, 2)), (3, (1, 2)), (7, (1, 2))]


In [56]:

us_states_df = tables_list[0].copy()
us_states_df.columns = ['state_name', 'obesity_rank', 'adults_obesity_rate_2005', 'adults_obesity_rate_2020',
                                'adults_overweight_rate_2005', 'children_and_adolescents_obesity_rate_2005']
def f(x):
    rate_float = np.nan
    rate_str = str(x)
    if '%' in rate_str:
        rate_float = float(rate_str.split('%')[0])
    
    return rate_float
for cn in ['adults_obesity_rate_2005', 'adults_obesity_rate_2020',
           'adults_overweight_rate_2005', 'children_and_adolescents_obesity_rate_2005']:
    us_states_df[cn] = us_states_df[cn].map(f)
us_states_df.sample(5)

Unnamed: 0,state_name,obesity_rank,adults_obesity_rate_2005,adults_obesity_rate_2020,adults_overweight_rate_2005,children_and_adolescents_obesity_rate_2005
19,Kentucky,8,28.4,34.3,66.8,20.6
0,Alabama,5,30.1,36.3,65.4,16.7
14,Idaho,32,24.6,29.3,61.4,10.1
35,North Carolina,20,27.1,32.1,63.4,19.3
45,South Dakota,22,26.1,31.9,64.2,12.1


In [57]:

mask_series = (us_states_df.state_name == 'District of Columbia')
us_states_df = us_states_df[~mask_series]

In [58]:

# Remove US states duplicates and misspellings
states_list = sorted(set(us_states_df.state_name).symmetric_difference(set(ssu.us_stats_df.index)))
print(states_list)
doubles_df = ssu.check_4_doubles(states_list)
mask_series = (doubles_df.max_similarity > 0.6)
columns_list = ['first_item', 'second_item', 'max_similarity']
if doubles_df[mask_series].shape[0]:
    display(doubles_df[mask_series][columns_list].sort_values('max_similarity', ascending=False))
mask_series = us_states_df.duplicated(subset=['state_name'], keep=False)
if us_states_df[mask_series].shape[0]:
    display(us_states_df[mask_series])

['American Samoa', 'District of Columbia', 'Guam', 'Northern Mariana Islands', 'Puerto Rico', 'Virgin Islands (U.S.)']


In [60]:

s.store_objects(obesity_us_states_df=us_states_df)

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\obesity_us_states_df.pkl



## Clean and prepare Countries dataset

In [61]:

url = 'https://en.wikipedia.org/wiki/List_of_countries_by_obesity_rate'
# driver = ssu.get_driver()
tables_list = ssu.get_page_tables(url, driver=None)
# driver.close()

[(0, (191, 3)), (1, (12, 2))]


In [62]:

countries_df = tables_list[0].copy()
countries_df.columns = ['country_name', 'obesity_rank', 'obesity_rate_2016']
countries_df.obesity_rate_2016 = countries_df.obesity_rate_2016.map(lambda x: float(x))
countries_df.sample(5)

Unnamed: 0,country_name,obesity_rank,obesity_rate_2016
8,Kiribati,9,46.0
25,Canada,26,29.4
1,Cook Islands,2,55.9
136,Mauritius,137,10.8
155,Equatorial Guinea,156,8.0


In [63]:

# Remove country duplicates and misspellings
countries_df.country_name = countries_df.country_name.map(lambda x: ssu.country_name_dict.get(x, x))
countries_list = sorted(set(countries_df.country_name).symmetric_difference(set(all_countries_df.country_name)))
doubles_df = ssu.check_4_doubles(countries_list)
mask_series = (doubles_df.max_similarity > 0.6)
columns_list = ['first_item', 'second_item', 'max_similarity']
if doubles_df[mask_series].shape[0]:
    display(doubles_df[mask_series][columns_list].sort_values('max_similarity', ascending=False))
mask_series = countries_df.duplicated(subset=['country_name'], keep=False)
if countries_df[mask_series].shape[0]:
    display(countries_df[mask_series])

Unnamed: 0,first_item,second_item,max_similarity
8,British Virgin Islands,US Virgin Islands,0.769231
10,Cayman Islands,Åland Islands,0.740741
48,Sint Maarten,St. Martin,0.727273
24,Guernsey,Jersey,0.714286
47,San Marino,St. Martin,0.7
6,Bouvet Island,Faroe Islands,0.692308
16,Faroe Islands,Åland Islands,0.692308
17,French Guiana,French Polynesia,0.62069


In [64]:

s.store_objects(obesity_countries_df=countries_df)

Pickling to C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\pkl\obesity_countries_df.pkl



## Prepare for and Create Choropleth

In [34]:

state_to_country_equivalent_dict, country_to_state_equivalent_dict = ssu.get_country_state_equivalents(countries_df, 'country_name',
                                                                                                       'obesity_rate_2016',
                                                                                                       us_states_df, 'state_name',
                                                                                                       'adults_obesity_rate_2005',
                                                                                                       verbose=False)
state_to_country_equivalent_dict, country_to_state_equivalent_dict = ssu.get_country_state_equivalents(
    countries_df, 'country_name', countries_target_column_name,
    us_states_df, 'state_name', states_target_column_name,
    cn_col_explanation=None, st_col_explanation=None,
    countries_set=None, states_set=None, verbose=False)

In [38]:

column_description_dict['adults_obesity_rate_2020'] = 'Obese adults (2020)'
column_description_dict['adults_obesity_rate_2005'] = 'Obese adults (mid-2000s)'
s.store_objects(column_description_dict=column_description_dict)
us_stats_df['Country_Equivalent_Obesity'] = us_stats_df.index.map(lambda x: state_to_country_equivalent_dict.get(x, x))

In [39]:

obesity_dict = us_states_df.set_index('state_name').adults_obesity_rate_2020.to_dict()
obesity_min = us_states_df.adults_obesity_rate_2020.min()
us_stats_df['adults_obesity_rate_2020'] = us_stats_df.index.map(lambda x: obesity_dict.get(x, obesity_min))

In [40]:

obesity_dict = us_states_df.set_index('state_name').adults_obesity_rate_2005.to_dict()
obesity_min = us_states_df.adults_obesity_rate_2005.min()
us_stats_df['adults_obesity_rate_2005'] = us_stats_df.index.map(lambda x: obesity_dict.get(x, obesity_min))


---
## Choropleth

In [41]:

c.create_label_line_file()
numeric_column_name = 'adults_obesity_rate_2005'
string_column_name = 'Country_Equivalent_Obesity'
svg_file_path = c.create_country_colored_labeled_map(numeric_column_name=numeric_column_name,
                                                     string_column_name=string_column_name,
                                                     one_country_df=us_stats_df)
print(os.path.abspath(svg_file_path))

C:\Users\daveb\OneDrive\Documents\GitHub\StatsByCountry\saves\svg\US_adults_obesity_rate_2005_Country_Equivalent_Obesity.svg



----
# Get State/Country Student Loan Debt Equivalents

In [17]:

url = 'https://educationdata.org/student-loan-debt-by-state'
tables_list = ssu.get_page_tables(url)

[(0, (53, 3))]


In [18]:

student_load_debt_us_states_df = tables_list[0].copy()
student_load_debt_us_states_df.sample(5)

Unnamed: 0,State,Average Borrower Debt,State’s Total Debt
10,Vermont,"$37,516",$2.9 billion
29,New Hampshire,"$34,085",$6.5 billion
52,Other/Unspecified*,"$25,960",$99.0 billion
36,Idaho,"$33,012",$7.2 billion
2,Georgia,"$41,639",$68.6 billion



----

In [10]:

us_stats_df.index

Index(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
       'Connecticut', 'Delaware', 'District of Columbia', 'Florida', 'Georgia',
       'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky',
       'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan',
       'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
       'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
       'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
       'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
       'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
       'West Virginia', 'Wisconsin', 'Wyoming'],
      dtype='object', name='state_name')