# Goal:
1. This notebook uses the WHO's DALY data to re-interpolate the road-injury induced DALYs
2. Also interplolate the road injury data from the WRS

In [76]:
import pandas as pd
import os
import numpy as np
import gspread
ROOTFOLDER = "/lustre1/g/geog_pyloo/05_timemachine/_raw/road_injury"
DALY_FILE = "DALYs_country.xlsx"
# INJURY_FILE = ""
serviceaccount = "../../google_drive_personal.json"
gc = gspread.service_account(filename=serviceaccount)
GC_URL = "https://docs.google.com/spreadsheets/d/1o5gFmZPUoDwrrbfE6M26uJF3HnEZll02ivnOxP6K6Xw/edit?usp=sharing"


In [77]:
def read_url(url, SHEET_NAME):
    SHEET_ID = url.split("/")[5]
    spreadsheet = gc.open_by_key(SHEET_ID)
    worksheet = spreadsheet.worksheet(SHEET_NAME)
    rows = worksheet.get_all_records()
    df_spread = pd.DataFrame(rows)
    return df_spread, worksheet

In [78]:
namemapping = {
    "Bolivia (Plurinational State of)":"Bolivia"
}
metadata = {
    "population":"1000",
    "other variables":"disability-adjusted life year (DALY)"
}

In [43]:
SHEETNAME = "select_city_classifier"
city_meta, other_worksheet = read_url(GC_URL, SHEETNAME)
city_meta = city_meta[city_meta['City']!=''].reset_index(drop = True)
city_meta["country_clean"] = np.where(city_meta["Country"].isin(["USA", "United States"]), 
                                      "United States of America",
                                      city_meta["Country"]
                                     )

In [48]:
df = pd.read_excel(os.path.join(ROOTFOLDER, DALY_FILE), header = 0)[1:].set_index("variables")
df = df.T.reset_index().rename(columns = {"index":"country", "population":"pop_country"})
df["country"] = df["country"].apply(lambda x: namemapping[x] if x in namemapping.keys() else x)
df

variables,country,pop_country,Diabetes mellitus,Mental and substance use disorders,Cardiovascular diseases,Road injury
0,Afghanistan,38042,,,,
1,Albania,2881,16.262246,54.53774,259.322365,22.878482
2,Algeria,43053,335.935792,943.083512,1743.355613,513.965265
3,Angola,31825,197.491208,570.29089,811.981057,583.038095
4,Antigua and Barbuda,97,1.894489,2.282226,4.756404,0.109076
...,...,...,...,...,...,...
178,Venezuela (Bolivarian Republic of),28516,420.296464,567.550488,933.880224,599.477747
179,Viet Nam,96462,1071.672022,1601.455637,5648.363479,1516.380188
180,Yemen,29162,109.243416,668.527203,1502.470454,549.858864
181,Zambia,17861,98.933445,287.359268,491.241726,223.792295


In [51]:
# load population sheet
pop_sheet = "urban_expansion"
pop_meta, pop_worksheet = read_url(GC_URL, pop_sheet)
pop_meta.rename(columns = {"City Name":"city", "Urban Extent Population":"urban_pop"}, inplace = True)

test = city_meta\
    .merge(df, left_on = ["country_clean"], right_on = "country", how = "left")\
    .merge(pop_meta[["city", "urban_pop"]], right_on = ["city"], left_on = ["City"], how = "left")
test.drop(["city", "country", "country_clean"], axis = 1, inplace = True)
test = test.fillna("")

In [62]:
other_worksheet.update(
    [test.columns.values.tolist()] + test.values.tolist()
)

{'spreadsheetId': '1o5gFmZPUoDwrrbfE6M26uJF3HnEZll02ivnOxP6K6Xw',
 'updatedRange': 'select_city_classifier!A1:S128',
 'updatedRows': 128,
 'updatedColumns': 19,
 'updatedCells': 2432}

## Reread all above

In [80]:
SHEETNAME = "select_city_classifier"
city_meta, other_worksheet = read_url(GC_URL, SHEETNAME)
city_meta = city_meta[city_meta['City']!=''].reset_index(drop = True)
city_meta["country_clean"] = np.where(city_meta["Country"].isin(["USA", "United States"]), 
                                      "United States of America",
                                      city_meta["Country"]
                                     )

In [73]:
country_level = ['Diabetes mellitus',
       'Mental and substance use disorders', 'Cardiovascular diseases',
       'Road injury']
ally = []
"""
Current definition of DALYs: DALYs are a combination of the sum of the years of 
potential life lost due to premature mortality and years of productive life lost due to a disability per 100 000 population.
To produce the DALYS at city level, we first run a regression between DALYS ~ country population, then predict with city population
"""


Unnamed: 0,City,Country,center_lat,center_lng,label,num_panoid,GSV Downloaded,Road Injury literature,Road injury data,left,...,Mental and substance use disorders,Cardiovascular diseases,Road injury,urban_pop,pop source,country_clean,diabetes_mellitus_city,mental_and_substance_use_disorders_city,cardiovascular_diseases_city,road_injury_city
0,Buenos Aires,Argentina,-34.599589,-58.380564,18,606938,2365132,,,-58.531449,...,1018.869452,1824.364079,379.257170,10568200,,Argentina,83.398899,240.450551,430.545197,89.503710
1,Sydney,Australia,-33.870453,151.208755,112,3411953,1413083,,,150.264995,...,882.731132,695.293632,79.588929,2871961,,Australia,22.280393,100.589985,79.230893,9.069408
2,Vienna,Austria,48.208166,16.371864,122,1166126,319435,,,16.182084,...,257.412530,467.253286,26.668338,1551706,,Austria,14.936479,44.603972,80.964794,4.621041
3,Saidpur,Bangladesh,25.778031,88.897626,102,4442,12721,https://journals-sagepub-com.eproxy.lib.hku.hk...,,88.851607,...,3231.301718,6247.642763,1732.543632,104935,,Bangladesh,0.704860,2.079638,4.020929,1.115050
4,Rajshahi,Bangladesh,24.374650,88.600367,95,240371,186022,https://journals-sagepub-com.eproxy.lib.hku.hk...,,88.017099,...,3231.301718,6247.642763,1732.543632,26943,,Bangladesh,0.180979,0.533966,1.032409,0.286299
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122,New York,USA,40.748459,-73.988593,83,2449048,757884,,,-74.255283,...,15385.702740,16900.440260,2376.142369,16235289,,United States of America,210.922085,759.094192,833.827760,117.233246
123,Los Angeles,USA,34.052235,-118.243683,65,2312065,940535,,,-118.668150,...,15385.702740,16900.440260,2376.142369,12355295,,United States of America,160.514826,577.681905,634.555255,89.216234
124,Houston,USA,29.760400,-95.369800,43,4574777,2261113,,,-95.784863,...,15385.702740,16900.440260,2376.142369,2739736,,United States of America,35.593504,128.098594,140.710026,19.783334
125,Philadelphia,USA,39.952583,-75.165222,91,754285,2997840,,,-75.279793,...,15385.702740,16900.440260,2376.142369,4760536,,United States of America,61.846893,222.582747,244.496237,34.375310


In [75]:
y_clean = city_meta[["City", "Country","urban_pop"]+ally].sort_values("urban_pop")
y_clean

Unnamed: 0,City,Country,urban_pop,diabetes_mellitus_city,mental_and_substance_use_disorders_city,cardiovascular_diseases_city,road_injury_city
12,Palmas,Brazil,6441,0.074931,0.199086,0.265327,0.059110
4,Rajshahi,Bangladesh,26943,0.180979,0.533966,1.032409,0.286299
37,Hindupur,India,56003,0.533460,1.179150,2.730550,0.645025
51,Parepare,Indonesia,70610,1.033251,1.048498,4.500087,0.512942
74,Zwolle,Netherlands,79950,0.613488,2.248419,2.687179,0.179434
...,...,...,...,...,...,...,...
17,Sao Paulo,Brazil,13654119,158.843593,422.037781,562.459552,125.306171
122,New York,USA,16235289,210.922085,759.094192,833.827760,117.233246
96,Seoul,Republic of Korea,17106932,144.862723,350.732651,420.228582,65.447411
62,Tokyo,Japan,29181161,199.935536,476.600043,1294.540493,60.630396
