In [1]:
import pandas as pd
import geopandas as gpd

In [2]:
arr_df = pd.read_csv('data_output/synthetic_arrivals.csv')
qua_df = pd.read_csv('data_output/synthetic_quarantined.csv')
iso_df = pd.read_csv('data_output/synthetic_isolated.csv')
inf_df = pd.read_csv('data_output/synthetic_infected.csv')
rec_df = pd.read_csv('data_output/synthetic_recovered.csv')
dea_df = pd.read_csv('data_output/synthetic_death.csv')

### Let's read a shapefile to figure out which provinces each district belong to

In [3]:
dists = gpd.read_file('nepal-map-governance/NEPAL_DISTRICTS_WGS.shp')
dists.head(20)

Unnamed: 0,DDGN,FIRST_DCOD,FIRST_DIST,FIRST_GN_C,FIRST_STAT,SHAPE_LENG,SHAPE_AREA,Area,Centroid_X,Centroid_Y,Geom,geometry
0,72001,72,KANCHANPUR,1.0,7,61398.506242,159916000.0,0.0,80.321796,28.861824,,"POLYGON ((80.18686 29.12730, 80.18682 29.12667..."
1,71001,71,KAILALI,1.0,7,53523.793758,77258990.0,0.0,80.879225,28.741642,,"POLYGON ((80.56604 29.06221, 80.56632 29.06219..."
2,75001,75,DARCHULA,1.0,7,136857.950678,613946800.0,0.0,80.791893,29.89195,,"POLYGON ((80.98957 30.24626, 80.99008 30.24612..."
3,70001,70,DOTI,1.0,7,70563.556809,128470300.0,0.0,80.89457,29.169642,,"POLYGON ((80.92900 29.42084, 80.92974 29.42047..."
4,73001,73,DADELDHURA,1.0,7,75657.458052,148903700.0,0.0,80.489069,29.223122,,"POLYGON ((80.68917 29.42236, 80.68937 29.42229..."
5,68001,68,BAJHANG,1.0,7,56003.179479,86147790.0,0.0,81.178718,29.708245,,"POLYGON ((81.09006 30.05419, 81.09033 30.05418..."
6,67099,67,BAJURA,99.0,7,16729.812963,7561569.0,0.0,81.563163,29.561322,,"POLYGON ((81.69810 29.94108, 81.69822 29.94061..."
7,74001,74,BAITADI,1.0,7,72884.037404,135153200.0,0.0,80.571517,29.507541,,"POLYGON ((80.75845 29.70453, 80.75864 29.70430..."
8,69001,69,ACHHAM,1.0,7,38661.135483,58262640.0,0.0,81.297112,29.112568,,"POLYGON ((81.17123 29.38651, 81.17158 29.38636..."
9,54001,54,RUKUM_W,1.0,6,145223.675668,560341400.0,0.0,82.460452,28.738838,,"POLYGON ((82.58349 28.98759, 82.58429 28.98714..."


In [4]:
province_mapper = lambda x: \
    'Province1' if x == 1 else \
    'Province2' if x == 2 else \
    'Bagmati' if x == 3 else \
    'Gandaki' if x == 4 else \
    'Province5' if x == 5 else  \
    'Karnali' if x == 6 else 'Sudurpaschim'

dists['PROVINCE'] = dists.FIRST_STAT.map(province_mapper)

In [5]:
# We will need to convert _E's and _W's 
# to East and West for four districts
# to match it with rest of our data

district_e_w_mapper = lambda x: \
    'Rukum east' if x == 'RUKUM_E' else \
    'Rukum west' if x == 'RUKUM_W' else \
    'Nawalparasi east' if x == 'NAWALPARASI_E' else \
    'Nawalparasi west' if x == 'NAWALPARASI_W' else x

cleaned_dists = dists['FIRST_DIST'].map(district_e_w_mapper).str.title()
dists['CLEAN_DIST'] = cleaned_dists

dists.head(20)

Unnamed: 0,DDGN,FIRST_DCOD,FIRST_DIST,FIRST_GN_C,FIRST_STAT,SHAPE_LENG,SHAPE_AREA,Area,Centroid_X,Centroid_Y,Geom,geometry,PROVINCE,CLEAN_DIST
0,72001,72,KANCHANPUR,1.0,7,61398.506242,159916000.0,0.0,80.321796,28.861824,,"POLYGON ((80.18686 29.12730, 80.18682 29.12667...",Sudurpaschim,Kanchanpur
1,71001,71,KAILALI,1.0,7,53523.793758,77258990.0,0.0,80.879225,28.741642,,"POLYGON ((80.56604 29.06221, 80.56632 29.06219...",Sudurpaschim,Kailali
2,75001,75,DARCHULA,1.0,7,136857.950678,613946800.0,0.0,80.791893,29.89195,,"POLYGON ((80.98957 30.24626, 80.99008 30.24612...",Sudurpaschim,Darchula
3,70001,70,DOTI,1.0,7,70563.556809,128470300.0,0.0,80.89457,29.169642,,"POLYGON ((80.92900 29.42084, 80.92974 29.42047...",Sudurpaschim,Doti
4,73001,73,DADELDHURA,1.0,7,75657.458052,148903700.0,0.0,80.489069,29.223122,,"POLYGON ((80.68917 29.42236, 80.68937 29.42229...",Sudurpaschim,Dadeldhura
5,68001,68,BAJHANG,1.0,7,56003.179479,86147790.0,0.0,81.178718,29.708245,,"POLYGON ((81.09006 30.05419, 81.09033 30.05418...",Sudurpaschim,Bajhang
6,67099,67,BAJURA,99.0,7,16729.812963,7561569.0,0.0,81.563163,29.561322,,"POLYGON ((81.69810 29.94108, 81.69822 29.94061...",Sudurpaschim,Bajura
7,74001,74,BAITADI,1.0,7,72884.037404,135153200.0,0.0,80.571517,29.507541,,"POLYGON ((80.75845 29.70453, 80.75864 29.70430...",Sudurpaschim,Baitadi
8,69001,69,ACHHAM,1.0,7,38661.135483,58262640.0,0.0,81.297112,29.112568,,"POLYGON ((81.17123 29.38651, 81.17158 29.38636...",Sudurpaschim,Achham
9,54001,54,RUKUM_W,1.0,6,145223.675668,560341400.0,0.0,82.460452,28.738838,,"POLYGON ((82.58349 28.98759, 82.58429 28.98714...",Karnali,Rukum West


In [6]:
# Let's check if any of the districts'
# spellings do not match

shp_dists = set(sorted(list(dists.CLEAN_DIST)))
our_dists = set(sorted(list(arr_df.columns[1:])))

shp_dists == our_dists

True

#### **GOOD!**

In [7]:
import json

In [8]:
arr_df

Unnamed: 0,Date,Panchthar,Ilam,Jhapa,Morang,Sunsari,Terhathum,Bhojpur,Sankhuwasabha,Solukhumbu,...,Jajarkot,Bardiya,Kailali,Achham,Bajhang,Darchula,Dadeldhura,Nawalparasi East,Nawalparasi West,Rukum West
0,2020-03-30,0,3.0,46.0,0,0,309.0,0,2.0,210.0,...,0,398.0,85.0,2.0,0,3.0,0,16.0,119.0,600.0
1,2020-03-31,0,0.0,251.0,0,24,229.0,0,1.0,119.0,...,0,0.0,87.0,33.0,0,36.0,462,265.0,39.0,199.0
2,2020-04-01,0,5.0,0.0,0,0,312.0,0,0.0,388.0,...,290,1260.0,57.0,0.0,25,41.0,95,763.0,221.0,83.0
3,2020-04-02,0,0.0,0.0,0,40,300.0,0,0.0,401.0,...,316,440.0,41.0,2.0,3,74.0,581,811.0,179.0,0.0
4,2020-04-03,0,4.0,0.0,0,0,292.0,0,0.0,1227.0,...,23,950.0,79.0,9.0,0,61.0,75,337.0,87.0,337.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,2020-08-31,0,0.0,0.0,0,29,156.0,0,0.0,53.0,...,2,1573.0,107.0,23.0,0,0.0,40,0.0,0.0,413.0
155,2020-09-01,0,0.0,100.0,0,36,285.0,0,0.0,358.0,...,77,762.0,26.0,0.0,19,66.0,0,0.0,369.0,0.0
156,2020-09-02,0,6.0,0.0,0,10,223.0,0,21.0,0.0,...,0,1370.0,40.0,8.0,0,143.0,502,584.0,37.0,0.0
157,2020-09-03,0,6.0,0.0,0,0,302.0,0,0.0,0.0,...,323,787.0,72.0,9.0,0,122.0,34,247.0,1.0,370.0


In [9]:
def approximate_and_save_district(dataset, pschema, path):
    _ids = 'province:' + dists.PROVINCE + ':district:' + dists.CLEAN_DIST
    dates = dataset.Date

    for _id in _ids:
        json_str = {
            "_id": _id,
            "_rev": None,
            "pschema": pschema,
            "data": []
        }

        for idx, date in enumerate(dates):
            json_str['data'].append([
                _id, _id.split(':')[1], _id.split(':')[-1],
                dataset[_id.split(':')[-1]][idx].astype('str'),
                None
            ])

        with open(path + _id.split(':')[-1] + '.json', 'w') as outfile:
            json.dump(json_str, outfile)

In [10]:
approximate_and_save_district(qua_df, "quarantined/v8", "districtwise_jsons/quarantined/")
approximate_and_save_district(arr_df, "arrivals/v8", "districtwise_jsons/arrivals/")
approximate_and_save_district(iso_df, "isolated/v8", "districtwise_jsons/isolated/")
approximate_and_save_district(inf_df, "infected/v8", "districtwise_jsons/infected/")
approximate_and_save_district(rec_df, "recovered/v8", "districtwise_jsons/recovered/")
approximate_and_save_district(dea_df, "dead/v8", "districtwise_jsons/dead/")

#### **GREAT!**

In [11]:
wards = gpd.read_file('nepal-map-governance/NEPAL_WARDS_WGS.shp')
wards

Unnamed: 0,OBJECTID,DCODE,DISTRICT,DAN,DAS,GaPa_NaPa,Type_GN,GN_CODE,NEW_WARD_N,DDGNWW,CENTER,STATE_CODE,DDGN,Area_SQKM,Shape_Leng,Shape_Area,geometry
0,1,51,ARGHAKHANCHI,,0,Sandhikharka,Nagarpalika,5.0,9,510509.0,Sandhikharka,5,51005,11.173221,18511.444524,1.117322e+07,"POLYGON ((83.12000 28.02279, 83.11993 28.02282..."
1,2,51,ARGHAKHANCHI,,0,Sandhikharka,Nagarpalika,5.0,10,510510.0,Sandhikharka,5,51005,8.840185,16383.537556,8.840185e+06,"POLYGON ((83.13196 28.01999, 83.13172 28.01987..."
2,3,51,ARGHAKHANCHI,,0,Malarani,Gaunpalika,3.0,1,510301.0,Khandaha Gufa,5,51003,5.797200,13644.745047,5.797200e+06,"POLYGON ((83.17366 28.02966, 83.17362 28.02976..."
3,4,51,ARGHAKHANCHI,,0,Chhatradev,Gaunpalika,2.0,8,510208.0,Chhatragunj,5,51002,14.180764,28896.050509,1.418076e+07,"POLYGON ((83.20093 28.07265, 83.20096 28.07263..."
4,5,51,ARGHAKHANCHI,,0,Malarani,Gaunpalika,3.0,2,510302.0,Khandaha Gufa,5,51003,8.470847,17679.412852,8.470847e+06,"POLYGON ((83.18839 28.06681, 83.18842 28.06664..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6798,6799,33,BARA,,0,Jitpur Simara,Upamahanagarpalika,5.0,18,330518.0,Jitpur,2,33005,7.888778,16309.015899,7.888778e+06,"POLYGON ((85.06679 27.14009, 85.06679 27.13918..."
6799,6800,33,BARA,,0,Kolhabi,Nagarpalika,8.0,11,330811.0,Kolhabi Bazar,2,33008,7.203428,19989.499583,7.203428e+06,"POLYGON ((85.15612 27.07047, 85.15654 27.07044..."
6800,6801,33,BARA,,0,Kolhabi,Nagarpalika,8.0,5,330805.0,Kolhabi Bazar,2,33008,20.554515,32475.975107,2.055451e+07,"MULTIPOLYGON (((85.13311 27.08342, 85.13347 27..."
6801,6802,33,BARA,,0,Jitpur Simara,Upamahanagarpalika,5.0,9,330509.0,Jitpur,2,33005,3.366166,10497.037372,3.366166e+06,"POLYGON ((84.95056 27.11354, 84.95032 27.11285..."


In [12]:
ktm_wards = wards[wards['DISTRICT'] == 'KATHMANDU']
ktm_wards

Unnamed: 0,OBJECTID,DCODE,DISTRICT,DAN,DAS,GaPa_NaPa,Type_GN,GN_CODE,NEW_WARD_N,DDGNWW,CENTER,STATE_CODE,DDGN,Area_SQKM,Shape_Leng,Shape_Area,geometry
3706,3707,27,KATHMANDU,,0,Gokarneshwor,Nagarpalika,4.0,3,270403.0,Gokarneshwor Municipality Office,3,27004,7.012783,13079.304062,7.012783e+06,"POLYGON ((85.40332 27.77693, 85.40328 27.77686..."
3707,3708,27,KATHMANDU,,0,Gokarneshwor,Nagarpalika,4.0,4,270404.0,Gokarneshwor Municipality Office,3,27004,4.691887,14361.326371,4.691887e+06,"POLYGON ((85.40531 27.73721, 85.40515 27.73690..."
3708,3709,27,KATHMANDU,,0,Gokarneshwor,Nagarpalika,4.0,5,270405.0,Gokarneshwor Municipality Office,3,27004,1.075972,4769.744766,1.075972e+06,"POLYGON ((85.37878 27.71702, 85.37845 27.71750..."
3709,3710,27,KATHMANDU,,0,Gokarneshwor,Nagarpalika,4.0,6,270406.0,Gokarneshwor Municipality Office,3,27004,0.559092,3503.426307,5.590917e+05,"POLYGON ((85.38245 27.72446, 85.38249 27.72432..."
3710,3711,27,KATHMANDU,,0,Gokarneshwor,Nagarpalika,4.0,8,270408.0,Gokarneshwor Municipality Office,3,27004,1.309237,5344.986086,1.309237e+06,"POLYGON ((85.38586 27.73292, 85.38596 27.73283..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3839,3840,27,KATHMANDU,,0,Kageshwori Manahora,Nagarpalika,5.0,1,270501.0,Kageshwori Manahora Nagarpalika Office,3,27005,10.764350,17681.728743,1.076435e+07,"POLYGON ((85.46283 27.77520, 85.46290 27.77512..."
3840,3841,27,KATHMANDU,,0,Kageshwori Manahora,Nagarpalika,5.0,2,270502.0,Kageshwori Manahora Nagarpalika Office,3,27005,1.470396,6235.762132,1.470396e+06,"POLYGON ((85.42474 27.75353, 85.42511 27.75317..."
3841,3842,27,KATHMANDU,,0,Kageshwori Manahora,Nagarpalika,5.0,3,270503.0,Kageshwori Manahora Nagarpalika Office,3,27005,1.670991,8327.919889,1.670991e+06,"POLYGON ((85.42281 27.74778, 85.42265 27.74765..."
3842,3843,27,KATHMANDU,,0,Kageshwori Manahora,Nagarpalika,5.0,6,270506.0,Kageshwori Manahora Nagarpalika Office,3,27005,3.021066,8602.488688,3.021066e+06,"POLYGON ((85.40998 27.71681, 85.40999 27.71605..."


In [13]:
total_area = ktm_wards.Shape_Area.sum()
ktm_wards['Area_Proportion'] = ktm_wards.Shape_Area / total_area
ktm_wards.Area_Proportion.sum()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


1.0

In [14]:
def approximate_and_save(dataset, pschema, path):
    _ids = 'province:Bagmati:' + 'district:Kathmandu:' + \
        ktm_wards.Type_GN.str.lower() + ':' + ktm_wards.GaPa_NaPa + \
        ':ward:' + ktm_wards.NEW_WARD_N.astype('str')

    dates = dataset.Date
    props = list(ktm_wards.Area_Proportion)

    for id_idx, _id in enumerate(_ids):
        json_str = {
            "_id": _id,
            "_rev": None,
            "pschema": pschema,
            "data": []
        }

        for idx, date in enumerate(dates):
            json_str['data'].append([
                _id, _id.split(':')[1], _id.split(':')[3],
                _id.split(':')[5],_id.split(':')[-1],
                round(dataset['Kathmandu'][idx] * props[id_idx]).astype('str'),
                None
            ])

        with open(path + _id.split(':')[5] + _id.split(':')[-1] + '.json', 'w') as outfile:
            json.dump(json_str, outfile)

In [15]:
approximate_and_save(qua_df, "quarantined/v8", "ktm_wardwise_jsons/quarantined/")
approximate_and_save(arr_df, "arrivals/v8", "ktm_wardwise_jsons/arrivals/")
approximate_and_save(iso_df, "isolated/v8", "ktm_wardwise_jsons/isolated/")
approximate_and_save(inf_df, "infected/v8", "ktm_wardwise_jsons/infected/")
approximate_and_save(rec_df, "recovered/v8", "ktm_wardwise_jsons/recovered/")
approximate_and_save(dea_df, "dead/v8", "ktm_wardwise_jsons/dead/")