In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd

In [4]:
from os.path import expanduser, join, abspath, exists

from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
import datetime

warehouse_location = abspath('spark-warehouse')

# Initialize Spark Session
spark = SparkSession \
    .builder \
    .appName("Forex processing") \
    .config("spark.sql.warehouse.dir", warehouse_location) \
    .enableHiveSupport() \
    .getOrCreate()

today = datetime.date.today() - datetime.timedelta(days = 1)
yesterday = today - datetime.timedelta(days = 1)


df_today = spark.read.json(f'/home/bayusamudra/Project/covid/covid_kawal_2020-12-19.json')

if exists(f'/home/bayusamudra/Project/covid/covid_kawal_2020-12-18.json'):
    df_yesterday = spark.read.json(f'/home/bayusamudra/Project/covid/covid_kawal_2020-12-18.json')
    update_covid1 = df_today.select(col('tanggal'), col("FID"), col("provinsi"),col("positif"),col("sembuh"),col("meninggal"))\
    .join(df_yesterday.select(col("positif").alias("positif_2"),col("sembuh").alias("sembuh_2"),col("meninggal").alias("meninggal_2"),col("provinsi")), "provinsi")\
    .withColumn("pertambahan_positif", (col("positif")-col("positif_2")))\
    .withColumn("pertambahan_sembuh", (col("sembuh")-col("sembuh_2")))\
    .withColumn("pertambahan_meninggal", (col("meninggal")-col("meninggal_2")))\
    .drop("positif_2", "sembuh_2","meninggal_2")

else:
    update_covid1 = df_today.withColumn("pertambahan_positif", col("positif")).withColumn("pertambahan_sembuh", col("sembuh")).withColumn("pertambahan_meninggal", col("meninggal"))


update_covid1=update_covid1.select("tanggal", "FID", "provinsi", "positif", "sembuh", "meninggal", "pertambahan_positif","pertambahan_sembuh", "pertambahan_meninggal")

In [5]:
covid = update_covid1.toPandas()

In [6]:
covid['FID'] = covid["FID"].astype(str)

In [7]:
indonesia = gpd.read_file('/home/bayusamudra/Downloads/idn_admbnda_adm1_bps_20200401/idn_admbnda_adm1_bps_20200401.shp')

In [8]:
indonesia['ADM1_PCODE'] = indonesia['ADM1_PCODE'].str.replace('[\D]', '').astype(str)
indonesia.loc[5,'ADM1_EN'] = 'DKI Jakarta'
indonesia.to_file('/home/bayusamudra/Project/indonesia.json', driver='GeoJSON')

In [9]:
with open('/home/bayusamudra/Project/indonesia.json') as rec:
    ina = json.load(rec)

In [10]:
ina["features"][0]

{'type': 'Feature',
 'properties': {'Shape_Leng': 27.3673635506,
  'Shape_Area': 4.62543727674,
  'ADM1_EN': 'Aceh',
  'ADM1_PCODE': '11',
  'ADM1_REF': None,
  'ADM1ALT1EN': None,
  'ADM1ALT2EN': None,
  'ADM0_EN': 'Indonesia',
  'ADM0_PCODE': 'ID',
  'date': '2019-12-20',
  'validOn': '2020-04-01',
  'validTo': '1899-11-30'},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[97.113005, 2.113689],
     [97.129534, 2.107722],
     [97.153591, 2.086227],
     [97.154088, 2.079963],
     [97.162686, 2.071394],
     [97.153, 2.062363],
     [97.153662, 2.0554],
     [97.135632, 2.045116],
     [97.125385, 2.045669],
     [97.118481, 2.034669],
     [97.128345, 2.012816],
     [97.126644, 2.007757],
     [97.114026, 2.004251],
     [97.112372, 2.016149],
     [97.097797, 2.032759],
     [97.091728, 2.053044],
     [97.085411, 2.060576],
     [97.077491, 2.076348],
     [97.078692, 2.086232],
     [97.08651, 2.089639],
     [97.093422, 2.087594],
     [97.098622, 2.093202],
     [9

In [58]:
import plotly.express as px

fig = px.choropleth(covid, geojson=ina, locations='provinsi', featureidkey="properties.ADM1_EN", 
                    color='pertambahan_positif', 
                    color_continuous_scale="viridis",
                    range_color=(0,covid['pertambahan_positif'].max()),
                    title="Peta Pertambahan Kasus Positif",
                    labels={'pertambahan_positif':'pertambahan kasus'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_geos(fitbounds="locations", visible=False)
fig.show()