# Aggregate Case Information by Date and Municiaplity

## Preliminaries

Import dependencies

In [32]:
import pandas as pd
import re
import numpy as np

Set Province to be used

In [3]:
province = "CATANDUANES"

Read from CSV File

In [4]:
# read from csv file
df = pd.read_csv(f"filtered_cases/{province}_case_information.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,CaseCode,Age,AgeGroup,Sex,DateSpecimen,DateResultRelease,DateRepConf,DateDied,DateRecover,...,ProvRes,CityMunRes,CityMuniPSGC,BarangayRes,BarangayPSGC,HealthStatus,Quarantined,DateOnset,Pregnanttab,ValidationStatus
0,19761,C781369,50.0,50 to 54,MALE,2020-05-30,,2020-06-06,,2020-07-07,...,CATANDUANES,SAN MIGUEL,PH052009000,,,RECOVERED,YES,,,Age or Birthdate is Invalid\nCase has Admittin...
1,29313,C636837,47.0,45 to 49,MALE,,,2020-06-22,,2020-07-08,...,CATANDUANES,BAGAMANOC,PH052001000,,,RECOVERED,YES,,,Age or Birthdate is Invalid
2,43311,C951557,1.0,0 to 4,MALE,2020-07-02,2020-07-04,2020-07-06,,2020-07-23,...,CATANDUANES,VIRAC (CAPITAL),PH052011000,,,RECOVERED,NO,2020-06-28,,Age or Birthdate is Invalid
3,45859,C639154,21.0,20 to 24,MALE,2020-06-30,2020-07-05,2020-07-08,,,...,CATANDUANES,CARAMORAN,PH052004000,,,RECOVERED,NO,2020-06-30,,"Removal Type is ""Recovered"", but no Recovered ..."
4,49871,C619176,22.0,20 to 24,FEMALE,2020-07-06,2020-07-07,2020-07-10,,2020-07-23,...,CATANDUANES,VIRAC (CAPITAL),PH052011000,,,RECOVERED,YES,,NO,Age or Birthdate is Invalid


## Aggregating Data

### List down columns
- Date : date recorded
- Municipality : municipality of case
- NewCases : number of new cases
- Deaths : number of deaths
- Recoveries : number of recoveries

Get List of Municipalities

In [36]:
mun_list = df["CityMunRes"].unique()

# remove nan
mun_list = mun_list[~pd.isnull(mun_list)]

# remove parenthesis
mun_list = [re.sub(' \(.*\)', '', x) for x in mun_list]

mun_list

['SAN MIGUEL',
 'BAGAMANOC',
 'VIRAC',
 'CARAMORAN',
 'GIGMOTO',
 'BARAS',
 'PANGANIBAN',
 'PANDAN',
 'BATO',
 'SAN ANDRES',
 'VIGA']

Parse Data by Municipality

In [59]:
# load municipality location data
locations = pd.read_csv(f"location_data/filtered_data/{province}.csv")

# set municipality name to uppercase
locations["Municipality/City"] = locations["Municipality/City"].apply(lambda x: x.upper())

locations = locations.set_index("Municipality/City")

locations

Unnamed: 0_level_0,Unnamed: 0.1,Unnamed: 0,Province,Latitude,Longitude
Municipality/City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BAGAMANOC,401,401,Catanduanes,13.939637,124.28783
BARAS,402,402,Catanduanes,13.659104,124.370468
BATO,403,403,Catanduanes,13.608036,124.297787
CARAMORAN,404,404,Catanduanes,13.983835,124.133872
GIGMOTO,405,405,Catanduanes,13.778669,124.391173
PANDAN,406,406,Catanduanes,14.049227,124.170211
PANGANIBAN,407,407,Catanduanes,13.908167,124.30082
SAN ANDRES,408,408,Catanduanes,13.598915,124.096969
SAN MIGUEL,409,409,Catanduanes,13.640329,124.303109
VIGA,410,410,Catanduanes,13.871531,124.30873


Create DataFrame for aggregated data

Use DateRepRem instead for consistency

In [70]:
df_aggregated = pd.DataFrame()
df_aggregated

for mun in mun_list:
    #print(mun)

    # for each unique dates, count the number of new cases
    cases = df.groupby("DateRepConf").size()

    # count number of deaths
    deaths = df[df["RemovalType"] == "DIED"].groupby("DateRepRem").size()

    # count number of recoveries
    recoveries = df[df["RemovalType"] == "RECOVERED"].groupby("DateRepRem").size()

    # create new DataFrame for data
    new_df = pd.DataFrame()
    new_df.index.name = "Date"

    new_df["NewCases"] = cases
    new_df["Deaths"] = deaths
    new_df["Recoveries"] = recoveries
    new_df["Municipality"] = mun
    new_df["Latitude"] = locations.loc[mun]["Latitude"]
    new_df["Longitude"] = locations.loc[mun]["Longitude"]

    new_df = new_df.fillna(0)

    # append to aggregated dataframe
    df_aggregated = pd.concat([df_aggregated, new_df], ignore_index=True)

    #mun_dict[mun] = new_df

    #print(new_df)

df_aggregated


Unnamed: 0,NewCases,Deaths,Recoveries,Municipality,Latitude,Longitude
0,1,0.0,0.0,SAN MIGUEL,13.640329,124.303109
1,1,0.0,0.0,SAN MIGUEL,13.640329,124.303109
2,2,0.0,0.0,SAN MIGUEL,13.640329,124.303109
3,2,0.0,0.0,SAN MIGUEL,13.640329,124.303109
4,1,0.0,0.0,SAN MIGUEL,13.640329,124.303109
...,...,...,...,...,...,...
6375,2,0.0,0.0,VIGA,13.871531,124.308730
6376,6,0.0,0.0,VIGA,13.871531,124.308730
6377,1,0.0,0.0,VIGA,13.871531,124.308730
6378,1,0.0,6.0,VIGA,13.871531,124.308730


Output aggregated data to CSV files

In [71]:
#for mun in mun_dict:
#    mun_dict[mun].to_csv(f"aggregated_data/{province}/{mun}.csv")

df_aggregated.to_csv(f"aggregated_data/{province}.csv")