## Key Results

Uses results of simulations (country results and df_zones results) to generate key numerical findings

In [3]:
import pathlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import weightedstats as ws
from tabulate import tabulate

In [3]:
country_medians = pd.read_csv("../results/median_results.csv")
country_means = pd.read_csv("../results/mean_results.csv")

In [4]:
df_gis = pd.read_csv("../data/GIS/GIS_data_zones.csv")

In [8]:
# check if id col has any duplicates
assert df_gis["id"].duplicated().sum() == 0

#### Data for Choropleths
Export country level and district level results for visualisations in Datawrapper

In [4]:
# Get ISOCODE and percent_without water and print without truncating
# pd.set_option('display.max_rows', None)
country_medians_simple = country_medians[["ISOCODE", "Entity", "percent_without_water"]]
country_medians_simple.to_csv("../results/country_medians.csv", index=False)

#### Key Median Results

In [5]:
# Make a single table with the 10 countries with the highest and lowest percentage of people without access to water
top10 = country_medians_simple.nlargest(10, "percent_without_water")
bottom10 = country_medians_simple.nsmallest(10, "percent_without_water")
top_bottom = pd.concat([top10, bottom10])
top_bottom.to_csv("../results/top_bottom_10_countries.csv", index=False)

In [13]:
# Calculate global population without water
global_median_percentage_without_water = country_medians["country_pop_without_water"].sum() / country_medians["country_pop_raw"].sum()
global_median_percentage_with_water = country_medians["country_pop_with_water"].sum() / country_medians["country_pop_raw"].sum()
global_median_percentage_piped_with_water = country_medians["population_piped_with_access"].sum() / country_medians["country_pop_raw"].sum()

# Print values
print(f"Global median percentage without water: {global_median_percentage_without_water}")
print(f"Global median percentage with water: {global_median_percentage_with_water}")
print(f"Global median percentage piped with water: {global_median_percentage_piped_with_water}")



Global median percentage without water: 0.3349761458238577
Global median percentage with water: 0.6648919346754667
Global median percentage piped with water: 0.35497538759528036


#### Key Mean Results (use to look at cycling vs walking breakdown)

In [1]:
# TODO update via Codium to add 5th and 95th percentiles
global_mean_percentage_piped_with_access = country_means["population_piped_with_access"].sum() / country_means["country_pop_raw"].sum()
global_mean_piped_percentage_with_cycling_access = country_means["population_piped_with_cycling_access"].sum() / country_means["population_piped_with_access"].sum()
global_mean_piped_percentage_with_walking_access = country_means["population_piped_with_walking_access"].sum() / country_means["population_piped_with_access"].sum()
global_mean_piped_percentage_with_only_cycling_access = (country_means["population_piped_with_access"].sum() - country_means["population_piped_with_walking_access"].sum()) / country_medians["population_piped_with_access"].sum()


# Print values
print(f"Global mean percentage piped with access: {global_mean_percentage_piped_with_access}")
print(f"Global mean percentage piped with cycling access: {global_mean_piped_percentage_with_cycling_access}")
print(f"Global mean percentage piped with walking access: {global_mean_piped_percentage_with_walking_access}")
print(f"Global mean percentage piped with only cycling access: {global_mean_piped_percentage_with_only_cycling_access}")

NameError: name 'country_means' is not defined

In [34]:
country_medians.head()

Unnamed: 0.1,Unnamed: 0,ISOCODE,Entity,country_pop_raw,country_pop_with_water,country_pop_without_water,population_piped_with_access,population_piped_with_cycling_access,population_piped_with_walking_access,Nat Piped,region,subregion,weighted_med,percent_with_water,percent_without_water,percentage_piped_with_walking_access,percentage_piped_with_cycling_access,population_piped_with_only_cycling_access,percentage_piped_with_only_cycling_access
0,1,AFG,Afghanistan,39835428.0,27068950.0,12766480.0,2890145.0,182253.287604,2810832.0,22.0,Asia,Southern Asia,5.830952,67.951946,32.048054,0.972557,0.06306,79312.997105,0.027443
1,2,AGO,Angola,33933611.0,12330820.0,21602790.0,3324783.0,137218.035347,3303394.0,42.0,Africa,Middle Africa,4.123105,36.338071,63.661929,0.993567,0.041271,21389.610186,0.006433
2,4,ALB,Albania,2872934.0,2284331.0,588602.9,1704957.0,34262.111983,1698505.0,81.0,Europe,Southern Europe,2.236068,79.512134,20.487866,0.996215,0.020096,6452.629846,0.003785
3,5,AND,Andorra,77354.0,32663.7,44690.3,32663.7,0.0,32663.7,100.0,Europe,Southern Europe,4.123105,42.226254,57.773746,1.0,0.0,0.0,0.0
4,6,ARE,United Arab Emirates,9991083.0,0.0,9991071.0,0.0,0.0,0.0,100.0,Asia,Western Asia,171.819703,0.0,99.999876,,,0.0,


#### Most important countries for bicycles

In [39]:
# TODO Change to means so walking/cycling split makes sense
# Calculate top 10 countries by lowest value for global_mean_piped_percentage_with_walking_access
country_medians["percentage_piped_with_walking_access"] = country_medians["population_piped_with_walking_access"] / country_medians["population_piped_with_access"]
country_medians["percentage_piped_with_cycling_access"] = country_medians["population_piped_with_cycling_access"] / country_medians["population_piped_with_access"]
country_medians["percentage_piped_of_total_access"] = country_medians["population_piped_with_access"] / country_medians["country_pop_with_water"]
country_medians["population_piped_with_only_cycling_access"] = country_medians["population_piped_with_access"] - country_medians["population_piped_with_walking_access"]
country_medians["percentage_piped_with_only_cycling_access"] = country_medians["population_piped_with_only_cycling_access"] / country_medians["population_piped_with_access"]

top10_piped_with_walking = country_medians.nsmallest(50, "percentage_piped_with_walking_access")

# print values
# print(f"Top 10 countries by lowest value for global_mean_piped_percentage_with_walking_access: {top10_piped_with_walking[['Entity', 'percentage_piped_with_walking_access', 'percentage_piped_with_cycling_access']]}")
# print(tabulate(top10_piped_with_walking[['Entity', 'percentage_piped_with_walking_access', 'percentage_piped_with_cycling_access', "percent_with_water", "population_piped_with_only_cycling_access"]], headers='keys', tablefmt='psql'))

# OR rank by cycling
bottom_piped_with_walking = country_medians.nlargest(30, "percentage_piped_with_only_cycling_access")
print(tabulate(bottom_piped_with_walking[['Entity', 'percentage_piped_with_only_cycling_access', "percentage_piped_of_total_access", "percent_with_water", "population_piped_with_only_cycling_access"]], headers='keys', tablefmt='psql'))
# print(tabulate(bottom_piped_with_walking, headers='keys', tablefmt='psql'))




+-----+--------------+---------------------------------------------+------------------------------------+----------------------+---------------------------------------------+
|     | Entity       |   percentage_piped_with_only_cycling_access |   percentage_piped_of_total_access |   percent_with_water |   population_piped_with_only_cycling_access |
|-----+--------------+---------------------------------------------+------------------------------------+----------------------+---------------------------------------------|
|  71 | Hong Kong    |                                   1         |                        0.243235    |             18.7134  |                            343786           |
|  93 | Kuwait       |                                   1         |                        0.00575724  |              7.95424 |                              1982.24        |
| 117 | Montserrat   |                                   0.693501  |                        0.41164     |             43.4622