In [362]:
import pandas as pd
import numpy as np
import math
from datetime import datetime

### Load data
Load data from all dimensions

In [363]:
# onset_date_dimension = pd.read_csv("")
# reported_date_dimension = pd.read_csv("")
# test_date_dimension = pd.read_csv("")
# specimen_date_dimension = pd.read_csv("")
# patient_dimension = pd.read_csv("")
# phu_dimension = pd.read_csv("")
mobility_dimension = pd.read_csv("../Mobility/mobility_dimension.csv")
special_measures_dimension = pd.read_csv("../Special_Measures/Restrictions.csv")
weather_dimension = pd.read_csv("../Weather/weather_dimension.csv")

In [364]:
# fact_table = pd.DataFrame(columns=['Onset_date_key', 'Reported_date_key', 'Test_date_key', 'Specimen_date_key', 'Patient_key', 'PHU_key',
#                                 'Mobility_key', 'Weather_key', 'Special_measures_key', 'Weather_key', 'Resolved', 'Unresolved', 'Fatal'])
fact_table = pd.DataFrame(columns=['Mobility_key', 'Weather_key','Special_measures_key'])

### Rename columns
Rename some columns to avoid duplicate keys

In [365]:
weather_dimension=weather_dimension.rename(columns={"surrogate_key": "Weather_key"})
special_measures_dimension=special_measures_dimension.rename(columns={"surrogate_key": "Special_measures_key"})

### Normalize Location attributes
Replace locations with just Ottawa for Toronto. For example, "Ottawa Division" is replaced with "Ottawa".

In [366]:
mobility_dimension["Subregion"]=mobility_dimension["Subregion"].replace(["Ottawa Division"], "Ottawa")
mobility_dimension["Subregion"]=mobility_dimension["Subregion"].replace(["Toronto Division"], "Toronto")
weather_dimension["Station Name"]=weather_dimension["Station Name"].replace(["OTTAWA INTL A", "OTTAWA CDA RCS"], "Ottawa")
weather_dimension["Station Name"]=weather_dimension["Station Name"].replace(["TORONTO CITY", "TORONTO INTL A"], "Toronto")

### Map PHU, Mobility, Special Measures and Weather
Make intersection between these dimensions to map based on date and location.

In [367]:
# Map mobility and weather 
fact_table_data = pd.merge(mobility_dimension, weather_dimension, left_on=["Date", "Subregion"], right_on=["Date/Time", "Station Name"], how="inner")

In [368]:
# Map mobility and weather to special measures by location and date range
fact_table_data["Special_measures_key"] = ""
for idx, row in fact_table_data.iterrows():
    if row["Station Name"]=="Ottawa":
        for idx_sm, row_sm in special_measures_dimension.head(5).iterrows():
            special_measures_key=row_sm["Special_measures_key"]
            start_date=datetime.strptime(row_sm["start_date"], "%Y-%m-%d")
            end_date=datetime.strptime(row_sm["end_date"], "%Y-%m-%d")
            date=datetime.strptime(row["Date"], "%Y-%m-%d")
            if (start_date <= date < end_date):
                fact_table_data.at[idx, 'Special_measures_key']=special_measures_key
    if row["Station Name"]=="Toronto":
        for idx_sm, row_sm in special_measures_dimension.tail(6).iterrows():
            special_measures_key=row_sm["Special_measures_key"]
            start_date=datetime.strptime(row_sm["start_date"], "%Y-%m-%d")
            end_date=datetime.strptime(row_sm["end_date"], "%Y-%m-%d")
            date=datetime.strptime(row["Date"], "%Y-%m-%d")
            if (start_date <= date < end_date):
                fact_table_data.at[idx, 'Special_measures_key']=special_measures_key

In [369]:
# Drop columns not needed for fact table creation
fact_table_data=fact_table_data.drop(columns=["Province", "Grocery_and_phramacy", "Parks", "Transit_stations", "Workplaces", "Residential", 
                              "Retail_and_recreation", "Mean Temp (°C)", "Min Temp (°C)", "Max Temp (°C)", "Total Precip (mm)",
                                              "Station Name", "Date/Time"])

In [370]:
for idx, row in fact_table_data.iterrows():
    fact_row=[row["Mobility_key"], row["Weather_key"], row["Special_measures_key"]]
    fact_table.loc[len(fact_table)] = fact_row

In [371]:
fact_table.to_csv("fact_table.csv", index=False)

In [372]:
fact_table_data

Unnamed: 0,Mobility_key,Date,Subregion,Weather_key,Special_measures_key
0,0,2020-10-27,Ottawa,0,0
1,1,2020-10-28,Ottawa,1,0
2,2,2020-10-29,Ottawa,2,0
3,3,2020-10-30,Ottawa,3,0
4,4,2020-10-31,Ottawa,4,0
...,...,...,...,...,...
241,242,2021-02-22,Toronto,241,10
242,243,2021-02-23,Toronto,242,10
243,244,2021-02-24,Toronto,243,10
244,245,2021-02-25,Toronto,244,10
