In [126]:
import pandas as pd
from pandas.io import gbq

In [127]:
# Import & Clean from Flat Files

# Population
pop_data = pd.read_csv("./flat_files/world_population.csv")
pop_data = pop_data[pop_data["Code"].isin(["KHM", "LAO", "MMR", "THA", "VNM"])]
pop_data = pop_data[pop_data["Year"] == 2021]
pop_data.rename(columns={'Population (historical estimates and future projections)': 'population'}, inplace=True)
pop_data = pop_data.drop(columns = ["Year"])

# Unemployment Rate
unempl_data = pd.read_csv("./flat_files/unemployment_rate.csv")
unempl_data = unempl_data[unempl_data["Code"].isin(["KHM", "LAO", "MMR", "THA", "VNM"])]
unempl_data = unempl_data.sort_values("Year", ascending=False).drop_duplicates(["Code"])
unempl_data.rename(columns={'Unemployment, total (% of total labor force) (modeled ILO estimate)': 'unemployment_rate'}, inplace=True)
unempl_data = unempl_data.drop(columns = ["Year"])

# Hospital Beds
hos_data = pd.read_csv("./flat_files/hospitals_beds_counts.csv")
hos_data = hos_data[hos_data["Code"].isin(["KHM", "LAO", "MMR", "THA", "VNM"])]
hos_data = hos_data.sort_values("Year", ascending=False).drop_duplicates(["Code"])
hos_data.rename(columns={'Hospital beds (total number)':'hospital_beds'}, inplace=True)
hos_data = hos_data.drop(columns = ["Year"])

# GDP
gdp_data = pd.read_csv("./flat_files/world_gdp.csv")
gdp_data = gdp_data[gdp_data["Code"].isin(["KHM", "LAO", "MMR", "THA", "VNM"])]
gdp_data = gdp_data.sort_values("Year", ascending=False).drop_duplicates(["Code"])
gdp_data.rename(columns={'GDP':'gdp'}, inplace=True)
gdp_data = gdp_data.drop(columns = ["Year"])

# Merge pop+unem+hos+gpd
merge_data_1 = pd.merge(pop_data, unempl_data)
merge_data_2 = pd.merge(merge_data_1, hos_data)
merge_data_3 = pd.merge(merge_data_2, gdp_data)

In [128]:
# Covid IndoChina
cov_data = pd.read_csv("./flat_files/indochina_covid_data_daily.csv")
cov_data = cov_data.fillna(0) #fill NaN values with 0
cov_data["tests_units"] = cov_data["tests_units"].replace(0, "no info") # tests_units columns with string values
cov_data.rename(columns={'iso_code':'Code', "location":"Entity"}, inplace=True)

# Final Merge
merge_data = pd.merge(cov_data, merge_data_3)

In [129]:
# Load dataframes onto Big Query
pop_data.to_gbq(destination_table="cis-4400-baruch-hung-tran.project_cis4400.pop_data", project_id="cis-4400-baruch-hung-tran", if_exists="replace")
unempl_data.to_gbq(destination_table="cis-4400-baruch-hung-tran.project_cis4400.unempl_data", project_id="cis-4400-baruch-hung-tran", if_exists="replace")
hos_data.to_gbq(destination_table="cis-4400-baruch-hung-tran.project_cis4400.hos_data", project_id="cis-4400-baruch-hung-tran", if_exists="replace")
gdp_data.to_gbq(destination_table="cis-4400-baruch-hung-tran.project_cis4400.gdp_data", project_id="cis-4400-baruch-hung-tran", if_exists="replace")
cov_data.to_gbq(destination_table="cis-4400-baruch-hung-tran.project_cis4400.cov_data", project_id="cis-4400-baruch-hung-tran", if_exists="replace")
merge_data.to_gbq(destination_table="cis-4400-baruch-hung-tran.project_cis4400.all_info", project_id="cis-4400-baruch-hung-tran", if_exists="replace")

print("Data Loaded Successfully")



1it [00:06,  6.99s/it]


1it [01:17, 77.56s/it]


1it [00:03,  3.74s/it]


1it [00:06,  6.39s/it]


1it [00:06,  6.55s/it]


1it [00:05,  5.02s/it]


Data Loaded Successfully
