In [None]:
import boto3
import os
from pathlib import Path
import pandas as pd
import json
from saiva.model.shared.utils import get_client_class, get_memory_usage
from saiva.model.shared.constants import MODEL_TYPE

## Load config

In [None]:
from saiva.model.shared.constants import LOCAL_TRAINING_CONFIG_PATH
from saiva.training.utils import load_config

config = load_config(LOCAL_TRAINING_CONFIG_PATH)
training_config = config.training_config

### These datacards need to be run for every client we're interested in - please see the below options for available client and datasource_id

In [None]:
[(organization_config.organization_id, organization_config.datasource.id) for organization_config in training_config.organization_configs]

In [None]:
CLIENT = training_config.organization_configs[0].organization_id
# can be modified if the client and datasource_ids are different
datasource_id = training_config.organization_configs[0].datasource.id

In [None]:
EXPERIMENT_DATES = training_config.training_metadata.experiment_dates
TRAIN_START_DATE, TEST_END_DATE = EXPERIMENT_DATES['train_start_date'], EXPERIMENT_DATES['test_end_date']

date_range = f'{TRAIN_START_DATE}-TO-{TEST_END_DATE}'

bucket = 'saiva-dev-data-bucket'
s3_file_folder = f"training_data/saiva-3-day-hosp-v6/{CLIENT}/{date_range}/datacard_data"

In [None]:
s3_folder_path = f's3://saiva-dev-data-bucket/training_data/saiva-3-day-hosp-v6/{CLIENT}/{date_range}/datacard_data'

s3_folder_path

In [None]:
folder_path = "/data/processed/"

In [None]:
MODEL_TYPE = MODEL_TYPE.lower()
files = [f'final-train_x_{MODEL_TYPE}.pickle', f'final-train_target_3_day_{MODEL_TYPE}.pickle', 
         f'final-train_idens_{MODEL_TYPE}.pickle', f'final-valid_x_{MODEL_TYPE}.pickle', 
         f'final-valid_target_3_day_{MODEL_TYPE}.pickle', f'final-valid_idens_{MODEL_TYPE}.pickle', 
         f'final-test_x_{MODEL_TYPE}.pickle', f'final-test_target_3_day_{MODEL_TYPE}.pickle', 
         f'final-test_idens_{MODEL_TYPE}.pickle', 'feature_names.pickle',
        ]

In [None]:
def upload_to_s3(file_path, bucket, s3_file_path):
    s3 = boto3.client('s3')
    s3.upload_file(file_path, bucket, s3_file_path)

In [None]:
for file in files:
    file_path = os.path.join(folder_path, file)
    upload_to_s3(file_path, bucket, os.path.join(s3_file_folder, file))

In [None]:
folder_path = ""

In [None]:
with open('model_config.json', 'r') as file:
    model_config = json.load(file)

In [None]:
model_config

In [None]:
files = ['feature_drop_stats.json', 'model_config.json', 'performance_valid_base.csv', 'performance_train_base.csv', 
         'performance_test_base.csv', 'duplicate_rows_performance_TEST_base.csv', 
        'duplicate_rows_performance_TRAIN_base.csv', 'duplicate_rows_performance_VALID_base.csv',f"{model_config['modelid']}.pickle" , 'trial_data.csv'
        ]

In [None]:
for file in files:
    file_path = os.path.join(folder_path, file)
    upload_to_s3(file_path, bucket, os.path.join(s3_file_folder, file))

In [None]:
print("Decisions Datacard")
print(f"""python run_datacard.py decisions run --outfile=s3://saiva-datacards/datacards/dev/{CLIENT}/{date_range}/ --s3-folder-path={s3_folder_path} --skip-client-name-in-files""")

print("\nXAEDY Datacard")
print(f"""python run_datacard.py xaedy run --model-type {MODEL_TYPE} --client={CLIENT} --datasource_id={datasource_id} --outfile=s3://saiva-datacards/datacards/dev/{CLIENT}/{date_range}/ --s3-folder-path={s3_folder_path} --skip-client-name-in-files""")

print("\nTrained Model Datacard")
print(f"""python run_datacard.py trained_model run --s3-folder-path={s3_folder_path} --outfile=s3://saiva-datacards/datacards/dev/{CLIENT}/{date_range}/""")

print("\nProbability Datacard")
print(f"""python run_datacard.py prediction_probability run --model-type {MODEL_TYPE} --client={CLIENT} --s3-folder-path={s3_folder_path} --outfile=s3://saiva-datacards/datacards/dev/{CLIENT}/{date_range}/""")

print("\nSHAP Values Datacard")
print(f"""python run_datacard.py shap_values run --model_type {MODEL_TYPE} --s3-folder-path={s3_folder_path} --outfile=s3://saiva-datacards/datacards/dev/{CLIENT}/{date_range}/""")

In [None]:
# clean up the datacard_data folder
s3 = boto3.client('s3')
objects_to_delete = []
for obj in s3.list_objects_v2(Bucket=bucket, Prefix=s3_file_folder).get('Contents', []):
    objects_to_delete.append({'Key': obj['Key']})


if objects_to_delete:
    s3.delete_objects(Bucket=bucket, Delete={'Objects': objects_to_delete})