In [22]:
import pandas as pd
from pandas.io import gbq

In [23]:
# Import & Clean from Flat Files

# Population
pop_data = pd.read_csv("./flat_files/world_population.csv")
pop_data = pop_data[pop_data["Year"] == 2021]
pop_data = pop_data[pop_data["Code"].isin(["KHM", "LAO", "MMR", "THA", "VNM"])]

# Unemployment Rate
unempl_data = pd.read_csv("./flat_files/unemployment_rate.csv")
unempl_data = unempl_data[unempl_data["Code"].isin(["KHM", "LAO", "MMR", "THA", "VNM"])]
unempl_data = unempl_data.sort_values("Year", ascending=False).drop_duplicates(["Code"])

# Hospital Beds
hos_data = pd.read_csv("./flat_files/hospitals_beds_counts.csv")
hos_data = hos_data[hos_data["Code"].isin(["KHM", "LAO", "MMR", "THA", "VNM"])]
hos_data = hos_data.sort_values("Year", ascending=False).drop_duplicates(["Code"])

# GDP
gdp_data = pd.read_csv("./flat_files/world_gdp.csv")
gdp_data = gdp_data[gdp_data["Code"].isin(["KHM", "LAO", "MMR", "THA", "VNM"])]
gdp_data = gdp_data.sort_values("Year", ascending=False).drop_duplicates(["Code"])

# Covid IndoChina
cov_data = pd.read_csv("./flat_files/indochina_covid_data_daily.csv")

In [24]:
# Merge tables into a data warehouse
merge_data = pd.merge(cov_data, pop_data, how='left', left_on='iso_code', right_on='Code')
merge_data = pd.merge(merge_data, unempl_data, how='left', left_on='iso_code', right_on='Code')
merge_data = pd.merge(merge_data, hos_data, how='left', left_on='iso_code', right_on='Code')
merge_data = pd.merge(merge_data, gdp_data, how='left', left_on='iso_code', right_on='Code')
merge_data = merge_data.drop(columns=['Entity_x','Code_x','Year_x','Entity_y','Code_y','Year_y'])

merge_data.rename(columns={'Population (historical estimates and future projections)': 'population', 'Unemployment, total (% of total labor force) (modeled ILO estimate)': 'unemployment_rate', 'Hospital beds (total number)':'hospital_beds', 'GDP':'gdp'}, inplace=True)
merge_data.head(900)

Unnamed: 0,iso_code,location,date,total_cases,new_cases,total_deaths,new_deaths,new_tests,total_tests,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,new_vaccinations,population,unemployment_rate,hospital_beds,gdp
0,KHM,Cambodia,6/1/2021,30710,616,220,6,4686.0,1162111.0,0.102,9.8,tests performed,4671201.0,2648859.0,2022342.0,57639.0,16946446,0.31,12417,5.954185e+10
1,KHM,Cambodia,6/2/2021,31460,750,230,10,5517.0,1167628.0,0.102,9.8,tests performed,4723944.0,2665822.0,2058122.0,52743.0,16946446,0.31,12417,5.954185e+10
2,KHM,Cambodia,6/3/2021,32189,729,236,6,5785.0,1173413.0,0.104,9.6,tests performed,4782587.0,2688014.0,2094573.0,58643.0,16946446,0.31,12417,5.954185e+10
3,KHM,Cambodia,6/4/2021,33075,886,242,6,4654.0,1178067.0,0.109,9.2,tests performed,4839260.0,2710359.0,2128901.0,56673.0,16946446,0.31,12417,5.954185e+10
4,KHM,Cambodia,6/5/2021,33613,538,252,10,5394.0,1183461.0,0.137,7.3,tests performed,4894315.0,2735199.0,2159116.0,55055.0,16946446,0.31,12417,5.954185e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
845,VNM,Vietnam,11/13/2021,1018346,8467,23018,88,,,,,,98930571.0,64322087.0,34608484.0,1098813.0,98168829,2.27,238458,6.614880e+11
846,VNM,Vietnam,11/14/2021,1026522,8176,23082,64,,,,,,99751224.0,64467940.0,35283284.0,820653.0,98168829,2.27,238458,6.614880e+11
847,VNM,Vietnam,11/15/2021,1035138,8616,23183,101,,,,,,100862898.0,64767521.0,36095377.0,1111674.0,98168829,2.27,238458,6.614880e+11
848,VNM,Vietnam,11/16/2021,1045397,10259,23270,87,,,,,,102030576.0,65222953.0,36807623.0,1167678.0,98168829,2.27,238458,6.614880e+11


In [26]:
# Load the data warehouse onto Big Query (as table covid_indochina)
merge_data.to_gbq(destination_table="cis-4400-baruch-hung-tran.project_cis4400.covid_indochina", project_id="cis-4400-baruch-hung-tran", if_exists="replace")
print("Data Loaded Successfully")

1it [00:04,  4.01s/it]


Data Loaded Successfully
