In [None]:
import pandas as pd
import json

import boto3
from eliot import log_message
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from sqlalchemy import text

In [None]:
class DbEngine(object):
    """
    Fetch the credentials from AWS Secrets Manager.
    :return: DB connection to the respective database
    """

    def __init__(self, region_name='us-east-1'):
        self.session = boto3.session.Session()
        self.secrets_client = self.session.client(
            service_name='secretsmanager',
            region_name=region_name
        )

    def get_secrets(self, secret_name):
        """
        :return: Based on the environment get secrets for
        Client SQL db & Postgres Saivadb
        """
        log_message(message_type='info', action_type='get_secrets', secret_name=secret_name)
        db_info = json.loads(
            self.secrets_client.get_secret_value(SecretId=secret_name)[
                'SecretString'
            ]
        )
        return db_info

    def get_postgresdb_engine(self):
        """
        Based on the environment connects to the respective database
        :param client: client name
        :return: Saivadb Postgres engine
        """
        log_message(message_type='info', action_type='connect_to_postgresdb', client='SaivaDB')
        # Fetch credentials from AWS Secrets Manager
        postgresdb_info = self.get_secrets(secret_name=f'prod-saivadb')
        # Create DB URL
        saivadb_url = URL(
            drivername='postgresql',
            username=postgresdb_info['username'],
            password=postgresdb_info['password'],
            host=postgresdb_info['host'],
            port=postgresdb_info['port'],
            database=postgresdb_info['dbname'],
        )
        # Return Postgres Engine
        return create_engine(saivadb_url, echo=False)
    
    def get_sqldb_engine(self, clientdb_name):
        """
        Based on the environment connects to the respective database.
        Avante db is in client VPN hence we use different credentials.
        :param client: client name
        :return: Client SQL engine
        """
        log_message(message_type='info', action_type='connect_to_sqldb', client=clientdb_name)
        # Fetch credentials from AWS Secrets Manager
        if clientdb_name == 'avante':
            sqldb_info = self.get_secrets(secret_name=f'avantedb')
        else:
            sqldb_info = self.get_secrets(secret_name=f'prod-sqlserver')
            sqldb_info['dbname'] = clientdb_name

        # Create DB URL
        client_sqldb_url = URL(
            drivername='mssql+pyodbc',
            username=sqldb_info['username'],
            password=sqldb_info['password'],
            host=sqldb_info['host'],
            port=sqldb_info['port'],
            database=sqldb_info['dbname'],
            query={'driver': 'ODBC Driver 17 for SQL Server'},
        )
        # Return Sql Engine
        return create_engine(client_sqldb_url, echo=False)
    
    def verify_connectivity(self, engine):
        assert engine.execute('select 1').fetchall() is not None  # verify connectivity

engine = DbEngine()
client_engine =  engine.get_postgresdb_engine()


In [None]:
query = f"""
with rh as (
    select ht.*,
    fa.facilityname,
    fp.masterpatientid,
    dp.modelid,
    dp.predictionrank,
    dp.censusdate,
    dp.show_in_report,
    fp.patientmrn,
    fp.firstname,
    fp.lastname
    from public.hospital_transfers ht
        left join public.facility_patient fp
        on ht.client = fp.client
        and ht.facilityid = fp.facilityid
        and ht.patientid = fp.patientid
            left join daily_predictions dp
            on ht.client = dp.client
            and ht.facilityid = dp.facilityid
            and (date(ht.dateoftransfer) - date(dp.censusdate)) <= 3
            and (date(ht.dateoftransfer) - date(dp.censusdate)) != 0
            and date(dp.censusdate) <= date(ht.dateoftransfer)
            and fp.masterpatientid = dp.masterpatientid
            left join facility fa
            on fa.facilityid = ht.facilityid
            and fa.client = ht.client
    where (dp.published = True or dp.published is null)
      and ht.dateoftransfer >= '2020-01-01 00:00:00'
      and (dp.experiment_group = True or dp.experiment_group is null)
      and fa.is_active=true
      and ht.planned='No'
      and (ht.outcome !='ED Visit Only' or ht.outcome is null)
      and (lower(ht.payerdescription) NOT LIKE '%hospice%' 
      or ht.payerdescription is null)

)
    SELECT rh.client,
           rh.facilityid,
           rh.facilityname,
           rh.modelid,
           rh.patientid,
           rh.masterpatientid,
           rh.patientmrn,
           rh.lastname,
           rh.firstname,
           rh.censusdate,
           rh.dateoftransfer,
           rh.lastadmissiondate,
           rh.planned,
           rh.transferreason,
           rh.otherreasonfortransfer,
           rh.outcome,
           rh.transferredto,
           rh.lengthofstay,
           rh.payertype,
           rh.payerdescription,
           rh.predictionrank as rank_cutoff,
           bool_or(rh.show_in_report) as show_in_report,
           (CASE
                WHEN bool_or(rh.show_in_report) IS NULL
                    THEN 0
                ELSE count(*)
               END
               ) as num_predictions
    FROM rh
    GROUP BY rh.client, rh.facilityid, rh.facilityname, rh.modelid,
             rh.patientid, rh.masterpatientid, rh.patientmrn, rh.lastname, rh.firstname, rh.censusdate, rh.dateoftransfer, 
             rh.lastadmissiondate, rh.planned, rh.transferreason, rh.otherreasonfortransfer, rh.outcome,
             rh.transferredto, rh.lengthofstay, rh.payertype, rh.payerdescription,rh.predictionrank
"""



In [None]:
base_df = pd.read_sql(text(query), con = client_engine)
base_df
# dataframe contains all the transferred residents.
base_df['resident_transferred'] = 1
base_df = base_df.sort_values(by=['client', 'facilityid', 'facilityname', 'masterpatientid', 'censusdate', 'dateoftransfer', 'rank_cutoff'])
base_df.to_csv('RTH_data.csv', index=False)
base_df.head()

In [None]:
query = """
select date(censusdate) as censusdate, client,facilityid ,count(*) as ranked_d from daily_predictions dp
group by censusdate,client,facilityid

"""
ranked_d = pd.read_sql(query, con = client_engine)
ranked_d.to_csv('ranked_d.csv', index=False)
ranked_d.head()