In [1]:
import pandas as pd
import numpy as np
import sqlite3
import requests
import pprint

import sys

#import geopandas as gpd
from shapely.geometry import Point
import pprint
import geojson

In [2]:
def get_country_coordinates(iso_code):
    # Establish API endpoint for coordinate retrieval based on country ISO code
    url = f"https://restcountries.com/v3.1/alpha/{iso_code}"
    
    # Send GET request
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        latlng = data[0]['latlng']  # latlng is a list [latitude, longitude]
        return latlng
    else:
        return None

In [3]:
skip_coords_retrieval = False
write_coords_to_file = False
verbose_retrieval_status = False

In [4]:
if(skip_coords_retrieval == True):
    coords_df = pd.read_csv("./data/coords.csv")
else:
    conn = sqlite3.connect("./data/malnutrition_data.db")
    query = "SELECT * FROM clean_data"

    data_df = pd.read_sql_query(query, conn)

    filtered_data_df = data_df.query("Dimension == 'Age (months)'")

    unique_countries = filtered_data_df["Country"].unique()
    unique_codes = filtered_data_df["Country ISO-3 Code"].unique()

    ## remove Chile from analysis
    unique_countries = np.delete(unique_countries, 28)
    unique_codes = np.delete(unique_codes, 28)

    lats = []
    longs = []

    #iso_coords_dict = {}
    if(verbose_retrieval_status == False):
        print("populating coordinates lists using get_country_coordinates()...")
    for code in unique_codes:
        temp_coords = get_country_coordinates(code)
        lats.append(temp_coords[0])
        longs.append(temp_coords[1])
        if(verbose_retrieval_status == True):
            print(f"added coords for {code}...")


    coords_df = pd.DataFrame();

    coords_df['country'] = unique_countries;
    coords_df['code'] = unique_codes;
    coords_df['lat'] = lats;
    coords_df['lon'] = longs;

    pprint.pp(coords_df)

    if(write_coords_to_file == True):
        coords_df.to_csv("./data/coords.csv", index=False)

populating coordinates lists using get_country_coordinates()...
                            country code        lat         lon
0                       Afghanistan  AFG  33.000000   65.000000
1                           Albania  ALB  41.000000   20.000000
2                           Algeria  DZA  28.000000    3.000000
3                            Angola  AGO -12.500000   18.500000
4                         Argentina  ARG -34.000000  -64.000000
..                              ...  ...        ...         ...
146                        Viet Nam  VNM  16.166667  107.833333
147                           Yemen  YEM  15.000000   48.000000
148                          Zambia  ZMB -15.000000   30.000000
149                        Zimbabwe  ZWE -20.000000   30.000000
150  occupied Palestinian territory  PSE  31.900000   35.200000

[151 rows x 4 columns]


In [5]:
conn = sqlite3.connect("./data/malnutrition_data.db")
query = "SELECT * FROM clean_data"

data_df = pd.read_sql_query(query, conn)

filtered_data_df = data_df.query("Dimension == 'Age (months)'")

In [6]:
unique_countries = filtered_data_df["Country"].unique()
unique_codes = filtered_data_df["Country ISO-3 Code"].unique()

unique_countries = np.delete(unique_countries, 28)
unique_codes = np.delete(unique_codes, 28)

per_country_dfs = []

In [7]:
for country_code in unique_codes:
    temp_df = filtered_data_df.query("`Country ISO-3 Code` == @country_code")
    per_country_dfs.append(temp_df)

country_year_indicators_lookup_dict = {}

for country_code in unique_codes:
    current_country_df = filtered_data_df.query("`Country ISO-3 Code` == @country_code")
    current_country_years = current_country_df["Year"].unique()
    for year in current_country_years:
        grouped_by_indicator_df = current_country_df.groupby("Anthropometric Indicator")["Prevalence Estimate %"].mean().reset_index()
        country_year_indicators_lookup_dict[country_code] = grouped_by_indicator_df

In [8]:
prevalence_df = pd.DataFrame()

prevalence_df["code"] = unique_codes;

indicators = ["Overweight", "Stunting", "Underweight", "Wasting", "Wasting Severe"]
prevalence_lists_by_indicator = {}

for indicator in indicators:
    prevalence_lists_by_indicator[indicator] = []

In [9]:
for key, value in country_year_indicators_lookup_dict.items():
    for indicator in indicators:
        #print(value[value["Anthropometric Indicator"] == indicator])
        temp_list = value[value["Anthropometric Indicator"] == indicator]["Prevalence Estimate %"].to_list()
        if(len(temp_list) == 1):
            prevalence_lists_by_indicator[indicator].append(temp_list[0])
        else:
            print(key)


In [10]:
for indicator in indicators:
    prevalence_lists_by_indicator[indicator] = []
for entry in country_year_indicators_lookup_dict.values():
    for indicator in indicators:
        prevalence = entry[entry["Anthropometric Indicator"] == indicator]["Prevalence Estimate %"]
        #print(type(prevalence))
        #print(type(prevalence["Prevalence Estimate %"]))

        val = (prevalence.to_list())[0]
        #print(indicator, entry)
        #print(indicator)
        #pprint.pp(prevalence)
        prevalence_lists_by_indicator[indicator].append(val)

In [11]:
for indicator in indicators:
    prevalence_df[indicator] = prevalence_lists_by_indicator[indicator]

In [12]:
def df_to_geojson(df, properties, lat='lat', lon='lon'):
    features=[]
    for _, row in df.iterrows():
        feature = geojson.Feature(
            geometry = geojson.Point((row[lon], row[lat])),
            properties={prop: row[prop] for prop in properties}
        )
        features.append(feature)
    return geojson.FeatureCollection(features)

In [13]:
pprint.pp(coords_df)
pprint.pp(prevalence_df)

                            country code        lat         lon
0                       Afghanistan  AFG  33.000000   65.000000
1                           Albania  ALB  41.000000   20.000000
2                           Algeria  DZA  28.000000    3.000000
3                            Angola  AGO -12.500000   18.500000
4                         Argentina  ARG -34.000000  -64.000000
..                              ...  ...        ...         ...
146                        Viet Nam  VNM  16.166667  107.833333
147                           Yemen  YEM  15.000000   48.000000
148                          Zambia  ZMB -15.000000   30.000000
149                        Zimbabwe  ZWE -20.000000   30.000000
150  occupied Palestinian territory  PSE  31.900000   35.200000

[151 rows x 4 columns]
    code  Overweight   Stunting  Underweight    Wasting  Wasting Severe
0    AFG    4.984954  42.385391    22.937286   7.276614        2.770802
1    ALB   21.083658  21.440019     5.303072   7.021891        3

In [14]:
overweight_layer_df = coords_df.copy()
overweight_layer_df["prevalence"] = prevalence_df["Overweight"]

stunting_layer_df = coords_df.copy()
stunting_layer_df["prevalence"] = prevalence_df["Stunting"]

underweight_layer_df = coords_df.copy()
underweight_layer_df["prevalence"] = prevalence_df["Underweight"]

wasting_layer_df = coords_df.copy()
wasting_layer_df["prevalence"] = prevalence_df["Wasting"]

wasting_severe_layer_df = coords_df.copy()
wasting_severe_layer_df["prevalence"] = prevalence_df["Wasting Severe"]

In [15]:
print("overweight_layer_df")
pprint.pp(overweight_layer_df)
print("stunting_layer_df")
pprint.pp(stunting_layer_df)
print("underweight_layer_df")
pprint.pp(underweight_layer_df)
print("wasting_layer_df")
pprint.pp(wasting_layer_df)
print("wasting_severe_layer_df")
pprint.pp(wasting_severe_layer_df)

overweight_layer_df
                            country code        lat         lon  prevalence
0                       Afghanistan  AFG  33.000000   65.000000    4.984954
1                           Albania  ALB  41.000000   20.000000   21.083658
2                           Algeria  DZA  28.000000    3.000000   13.257227
3                            Angola  AGO -12.500000   18.500000    2.827835
4                         Argentina  ARG -34.000000  -64.000000   10.927755
..                              ...  ...        ...         ...         ...
146                        Viet Nam  VNM  16.166667  107.833333    3.383203
147                           Yemen  YEM  15.000000   48.000000    4.246249
148                          Zambia  ZMB -15.000000   30.000000    7.511327
149                        Zimbabwe  ZWE -20.000000   30.000000    6.979456
150  occupied Palestinian territory  PSE  31.900000   35.200000    7.626907

[151 rows x 5 columns]
stunting_layer_df
                          

In [16]:
overweight_layer_geojson = df_to_geojson(overweight_layer_df, ["country", "prevalence"])
stunting_layer_geojson = df_to_geojson(stunting_layer_df, ["country", "prevalence"])
underweight_layer_geojson = df_to_geojson(underweight_layer_df, ["country", "prevalence"])
wasting_layer_geojson = df_to_geojson(wasting_layer_df, ["country", "prevalence"])
wasting_severe_layer_geojson = df_to_geojson(wasting_severe_layer_df, ["country", "prevalence"])

In [17]:
with open("./data/overweight_layer.geojson", 'w') as f:
    geojson.dump(overweight_layer_geojson, f)

with open("./data/stunting_layer.geojson", 'w') as f:
    geojson.dump(stunting_layer_geojson, f)

with open("./data/underweight_layer.geojson", 'w') as f:
    geojson.dump(underweight_layer_geojson, f)

with open("./data/wasting_layer.geojson", 'w') as f:
    geojson.dump(wasting_layer_geojson, f)

with open("./data/wasting_severe_layer.geojson", 'w') as f:
    geojson.dump(wasting_severe_layer_geojson, f)

In [18]:
import geopandas as gpd

geom = [Point(xy) for xy in zip(overweight_layer_df['lon'], overweight_layer_df['lat'])]

In [22]:
overweight_gdf = gpd.GeoDataFrame(overweight_layer_df, geometry=geom)
overweight_gdf.to_file("./data/overweight_gdf.geojson", driver='GeoJSON')

stunting_gdf = gpd.GeoDataFrame(stunting_layer_df, geometry=geom)
stunting_gdf.to_file("./data/stunting_gdf.geojson", driver='GeoJSON')

underweight_gdf = gpd.GeoDataFrame(underweight_layer_df, geometry=geom)
underweight_gdf.to_file("./data/underweight_gdf.geojson", driver='GeoJSON')

wasting_gdf = gpd.GeoDataFrame(wasting_layer_df, geometry=geom)
wasting_gdf.to_file("./data/wasting_gdf.geojson", driver='GeoJSON')

wasting_severe_gdf = gpd.GeoDataFrame(wasting_severe_layer_df, geometry=geom)
wasting_severe_gdf.to_file("./data/wasting_severe_gdf.geojson", driver='GeoJSON')
