In [1]:
from math import radians, cos, sin, asin, sqrt
import pandas as pd 
from tqdm import tqdm
import folium
import numpy as np
import requests

pd.set_option('max_rows',500)
pd.set_option('max_columns',500)

# url for map and corona data

In [2]:
# idn_json_url ="https://raw.githubusercontent.com/superpikar/indonesia-geojson/master/indonesia-province-simple.json"
idn_json_url = "https://raw.githubusercontent.com/adamaulia/indonesia-geojson/master/indonesia-province-simple.json"
corona_url = "https://api.kawalcorona.com/indonesia/provinsi"

In [3]:
# get map data 
# df_json = pd.DataFrame(requests.get(idn_json_url).json())
json_map = requests.get(idn_json_url).json()

In [4]:
# get map data 
df_json = pd.DataFrame(requests.get(idn_json_url).json())

# get corona data
json_corona = requests.get(corona_url).json()

# some clean up 
df_corona = pd.DataFrame(list(map(lambda n: n['attributes'], json_corona)))

# get uniq provinsi from both corona and map data
# df_json -> key = kode, feateures ->properties -> kode
# df_corona -> key = Kode_Provi
# df_corona province id must be on the left side 

#simpyfy dataframe
df_corona = df_corona[['Kode_Provi', 'Kasus_Posi', 'Kasus_Semb','Kasus_Meni','FID','Provinsi']]


# df_corona['positif_sum'] = df_corona['Kasus_Posi'].sum()

# df_corona['ratio'] = df_corona['Kasus_Posi']/df_corona['positif_sum']

df_corona['log_positif'] = df_corona['Kasus_Posi'].apply(lambda x:np.log(x))

# uniq province id from map
df_json_prov = pd.DataFrame(list(map(lambda n: {'prov_id' : n['properties']['kode'], 'prov_name' : n['properties']['Propinsi']}, df_json['features'])))

In [5]:
# audit data to check mapping between map data and corona data
df_join = pd.merge(df_corona,df_json_prov,left_on='Kode_Provi', right_on='prov_id',how='outer')

# 1st map visualization

In [6]:
m = folium.Map(location=[-0.4471383, 117.1655734], zoom_start=3)
#  ‘BuGn’, ‘BuPu’, ‘GnBu’, ‘OrRd’, ‘PuBu’, ‘PuBuGn’, ‘PuRd’, ‘RdPu’, ‘YlGn’, ‘YlGnBu’, ‘YlOrBr’, and ‘YlOrRd

folium.Choropleth(
    geo_data=idn_json_url,
    name='choropleth',
    data=df_corona,
    columns=['Kode_Provi','log_positif','Kasus_Posi'],
    key_on='feature.properties.kode',
    fill_color='YlOrRd',
    fill_opacity=0.9,
    line_opacity=0.2,
    legend_name='Kasus_Positif logaritmic scale'
).add_to(m)

m

In [7]:
# a = df_json['features'][0]['geometry']['coordinates']

# add pin point location each province

In [8]:
# extract lat, long and province name 
province_list , coord_list, province_id = [], [],[]
for item in df_json['features']:
#     province_list.append(df_json['properties']['Propinsi'])
    province_list.append(item['properties']['Propinsi'])
    province_id.append(item['properties']['kode'])
    tmp = item['geometry']['coordinates']
    flat_list = []
    for item in tmp :
        for subitem in item:
            for subsubitem in subitem:
                flat_list.append(subsubitem)
    coord_list.append(flat_list)

In [9]:
# put into dataframe
df_tmp = pd.DataFrame({'province':province_list,'coord_tmp':coord_list,'ID':province_id})
df_tmp['length'] = df_tmp['coord_tmp'].apply(len)

In [10]:
spc_city = ['GORONTALO', "DKI JAKARTA", "JAWA BARAT", "KALIMANTAN TENGAH", "SUMATERA SELATAN", "JAMBI", "LAMPUNG","DAERAH ISTIMEWA YOGYAKARTA"]

In [11]:
def get_lat_long(list_input):
    lat_long = []
    lat = []
    long = []
    for i in list_input:
        if i > 50:
            long.append(i)
        else :
            lat.append(i)
    if len(lat) == len(long):
        for i in range(len(lat)):
            lat_long.append([long[i],lat[i]])
    return lat_long

In [12]:
# city with different coord structure 
df_tmp1_a = df_tmp[df_tmp['province'].isin(spc_city)]

# city with normal coord structure
df_tmp1_b = df_tmp[~df_tmp['province'].isin(spc_city)]

In [13]:
# grouping lat and long 
df_tmp1_a['coord_tmp2'] = df_tmp1_a['coord_tmp'].apply(get_lat_long)

df_tmp1_a.drop('coord_tmp', inplace=True,axis=1)

df_tmp1_a = df_tmp1_a[['province','coord_tmp2','length','ID']]

df_tmp1_a.columns = ['province','coord_tmp','length','ID']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [14]:
# explode lat long
df_tmp2 = pd.concat([df_tmp1_a,df_tmp1_b])
df_tmp2 = df_tmp2.explode('coord_tmp')

# extract lat and long into their column
df_tmp2['lat'] = df_tmp2['coord_tmp'].str[1]
df_tmp2['long'] = df_tmp2['coord_tmp'].str[0]

# avg lat and long per province 
df_tmp2_gb = df_tmp2.groupby(['province','ID']).agg( {'lat':'mean','long':'mean'})

df_tmp2_gb = df_tmp2_gb.dropna()
df_tmp2_gb = df_tmp2_gb.reset_index()

#join with corona data 
df_province_point = pd.merge(df_tmp2_gb,df_corona,left_on='ID',right_on='Kode_Provi',how='inner')

In [15]:
df_province_point.head()

Unnamed: 0,province,ID,lat,long,Kode_Provi,Kasus_Posi,Kasus_Semb,Kasus_Meni,FID,Provinsi,log_positif
0,BALI,51,-8.480445,115.162647,51,374,280,4,17,Bali,5.924256
1,BANGKA BELITUNG,19,-2.518896,106.638688,19,36,24,1,8,Kepulauan Bangka Belitung,3.583519
2,BENGKULU,17,-3.863151,102.648215,17,69,3,2,7,Bengkulu,4.234107
3,DAERAH ISTIMEWA YOGYAKARTA,34,-7.887106,110.448192,34,215,105,8,14,Daerah Istimewa Yogyakarta,5.370638
4,DI. ACEH,11,3.681516,96.833209,11,18,15,1,1,Aceh,2.890372


In [16]:
for row in df_province_point.itertuples():    
    folium.Marker(
        location=[row.lat, row.long],
        icon=folium.Icon(color='blue', icon='info-sign'),
        popup = row.province +' '+str(row.Kasus_Posi) 
    ).add_to(m)

In [17]:
m