In [None]:
import sys
from pathlib import Path

sys.path.insert(0, '/src')
import pandas as pd

from shared.load_raw_data import fetch_cache_data
from shared.utils import get_client_class
from shared.constants import CLIENT, END_DATE,START_DATE
from eliot import  to_file
from shared.data_manager import DataManager
to_file(sys.stdout)

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

In [None]:
# Load the data from local directory cache 

processed_path = Path('/data/processed')
processed_path.mkdir(parents=True, exist_ok=True)
facilityid = 1
S3_BUCKET = 'saiva-dev-data-bucket'
clientClass = get_client_class(client=CLIENT)

result_dict = fetch_cache_data(client=CLIENT, generic=True)
for key, value in result_dict.items():
    print(f'{key} : {result_dict[key].shape}')

In [None]:
%%time

dm = DataManager(
    result_dict=result_dict,
    facilityid=facilityid,
    client=CLIENT,
    start_date=START_DATE,
    end_date=END_DATE,
    s3_bucket=S3_BUCKET,
)

In [None]:
%%time

alerts_df, admissions_df, diagnosis_df, rehosp_df = dm.get_features()
alerts_df.to_parquet(processed_path/'alerts_df.parquet')
admissions_df.to_parquet(processed_path/'admissions_df.parquet')
diagnosis_df.to_parquet(processed_path/'diagnosis_df.parquet')
rehosp_df.to_parquet(processed_path/'rehosp_df.parquet')

print(alerts_df.shape)
print(admissions_df.shape)
print(diagnosis_df.shape)
print(rehosp_df.shape)

rehosp_df.head(3)

In [None]:
%%time

final_df = dm.merge_features(
    alerts_df,
    admissions_df,
    diagnosis_df,
    rehosp_df
)
final_df.head()

## +++++++++++++++++++++ RANKING +++++++++++++++++++

In [None]:
score_df = dm.generate_total_score(final_df)
result_df = dm.generate_ranks(score_df)

result_df.head(20)

In [None]:
result_df.to_parquet(processed_path/'final_df.parquet')

In [None]:
result_df[result_df['censusdate'] == '2020-12-02'].head(25)