In [None]:
import json
import boto3
import numpy as np
from eliot import log_message
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
import pandas as pd
import time

In [None]:
# database class.

class DbEngine(object):
    """
    Fetch the credentials from AWS Secrets Manager.
    :return: DB connection to the respective database
    """

    def __init__(self, region_name='us-east-1'):
        self.session = boto3.session.Session()
        self.secrets_client = self.session.client(
            service_name='secretsmanager',
            region_name=region_name
        )

    def get_secrets(self, secret_name):
        """
        :return: Based on the environment get secrets for
        Client SQL db & Postgres Saivadb
        """
        log_message(message_type='info', action_type='get_secrets', secret_name=secret_name)
        db_info = json.loads(
            self.secrets_client.get_secret_value(SecretId=secret_name)[
                'SecretString'
            ]
        )
        return db_info

    def get_sqldb_engine(self, clientdb_name):
        """
        Based on the environment connects to the respective database.
        Avante db is in client VPN hence we use different credentials.
        :param client: client name
        :return: Client SQL engine
        """
        log_message(message_type='info', action_type='connect_to_sqldb', client=clientdb_name)
        # Fetch credentials from AWS Secrets Manager
        if clientdb_name == 'avante':
            sqldb_info = self.get_secrets(secret_name=f'avantedb')
        else:
            sqldb_info = self.get_secrets(secret_name=f'dev-sqlserver')
            sqldb_info['dbname'] = clientdb_name

        # Create DB URL
        client_sqldb_url = URL(
            drivername='mssql+pyodbc',
            username=sqldb_info['username'],
            password=sqldb_info['password'],
            host=sqldb_info['host'],
            port=sqldb_info['port'],
            database=sqldb_info['dbname'],
            query={'driver': 'ODBC Driver 17 for SQL Server'},
        )
        # Return Sql Engine
        return create_engine(client_sqldb_url, echo=False, fast_executemany=True)

    def verify_connectivity(self, engine):
        assert engine.execute('select 1').fetchall() is not None  # verify connectivity

In [None]:
def get_new_query_dict():
    query_dict = {
        'view_ods_room' : '''
SELECT *
from view_ods_room       
        ''',
    }
    return query_dict


In [None]:
engine = DbEngine()
avante_engine =  engine.get_sqldb_engine(clientdb_name='avante')

dest_engine = engine.get_sqldb_engine(clientdb_name='avante_data_from_july_2020')

In [None]:
query_dict = get_new_query_dict()
for table_name, query in query_dict.items():
    

    print('------------')
    print(f'about to read {table_name} from {avante_engine.url.database}...')
    print(query)
    
    query_df = pd.read_sql(query, con=avante_engine)
    
    print(f'about to write {table_name} of shape {query_df.shape} to {dest_engine.url.database}...')
    # Version issue, from 23.0 to 24.2 latest version to solve the problem, at least need version 23.1. 
    # The old version 23.0 does not implement the fast_executemany method and can only accept 2100 parameters. 
    # Can be executed slowly using the chunksize parameter.
    # tsql_chunksize = 2097 // len(query_df.columns)
    tsql_chunksize = 2097 // len(query_df.columns)
    # cap at 1000 (limit for number of rows inserted by table-value constructor)
    # tsql_chunksize = 1000 if tsql_chunksize > 1000 else tsql_chunksize
    print(f'chunksize = {tsql_chunksize}')
    
    t0 = time.time()
    if table_name == 'view_ods_Patient_weights_vitals' or table_name == 'view_ods_physician_order_list_med':
        print('writing using chunks')
        query_df.to_sql(table_name, con=dest_engine, if_exists='replace', index=False, chunksize = tsql_chunksize, method='multi')
    else:
        print('writing WITHOUT chunks')
        query_df.to_sql(table_name, con=dest_engine, if_exists='replace', index=False)
    t1 = time.time()
    time_taken = t1-t0
    
    print(f'------------write time taken: {time_taken/60:.2f} minutes')
    

In [None]:
# All queries should be included below (to be used when we need to rewrite all the tables again)
def get_all_query_dict():
    query_dict = {
        'view_ods_facility' : '''
SELECT *
FROM dbo.view_ods_facility
        ''',
        'view_ods_facility_patient' : '''
SELECT * 
FROM view_ods_facility_patient
WHERE patientid in
  (SELECT clientid
   FROM view_ods_daily_census_v2
   WHERE censusdate BETWEEN '2020-07-01 00:00:00.000' AND DATEADD(d, +1, CURRENT_TIMESTAMP)
  );
        ''',
        'view_ods_master_patient' : '''
SELECT *
FROM view_ods_master_patient
WHERE masterpatientid in
  (SELECT masterpatientid 
   FROM view_ods_daily_census_v2 a
   left join view_ods_facility_patient b 
   on a.clientid = b.patientid and a.facilityid = b.facilityid
   WHERE censusdate BETWEEN '2020-07-01 00:00:00.000' AND DATEADD(d, +1, CURRENT_TIMESTAMP)
  );
''',
        'view_ods_census_codes' : '''
SELECT *
from view_ods_census_codes
        ''',
        'view_ods_daily_census_v2' : '''
SELECT *
FROM view_ods_daily_census_v2
WHERE censusdate BETWEEN '2020-07-01 00:00:00.000' AND DATEADD(d, +1, CURRENT_TIMESTAMP)
        ''',
        'view_ods_hospital_transfers_transfer_log_v2' : '''
SELECT *
FROM view_ods_hospital_transfers_transfer_log_v2
WHERE dateoftransfer BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP        
        ''',
        'view_ods_hospital_transfers_admission_log' : '''
SELECT *
FROM view_ods_hospital_transfers_admission_log
WHERE DateOfAdmission BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP
        ''',
        'view_ods_patient_diagnosis' : '''
SELECT *
FROM view_ods_patient_diagnosis
WHERE revisiondate BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP
        ''',
        'view_ods_Patient_weights_vitals' : '''
SELECT *
FROM view_ods_Patient_weights_vitals
WHERE date BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP
        ''',
        'view_ods_physician_order_list_med' : '''
SELECT *
FROM view_ods_physician_order_list_med
WHERE PhysiciansOrderID in 
  (SELECT PhysiciansOrderID 
   FROM view_ods_physician_order_list_v2 
   WHERE OrderRevisionDate BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP
  );
        ''',
        'view_ods_physician_order_list_v2' : '''
SELECT *
FROM view_ods_physician_order_list_v2
WHERE OrderRevisionDate BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP
        ''',
        'view_ods_cr_alert_triggered_item_type' : '''
SELECT *
FROM view_ods_cr_alert_triggered_item_type;
        ''',
        'view_ods_cr_alert' : '''
SELECT *
FROM view_ods_cr_alert 
WHERE RevisionDate BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP
        ''',
        'view_ods_progress_note' : '''
SELECT *
FROM view_ods_progress_note
WHERE createddate BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP
        ''',
        'view_ods_result_lab_test_abnormality' : '''
SELECT *
FROM view_ods_result_lab_test_abnormality
        ''',
        'view_ods_result_lab_report_severity' : '''
SELECT * 
FROM view_ods_result_lab_report_severity;
        ''',
        'view_ods_result_order_source' : '''
SELECT * 
FROM view_ods_result_order_source 
WHERE resultsourcedate BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP
        ''',
        'view_ods_result_lab_report' : '''
SELECT * 
FROM view_ods_result_lab_report 
WHERE reporteddate BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP;
        ''',
        'view_ods_result_lab_report_detail' : '''
SELECT * 
FROM view_ods_result_lab_report_detail 
WHERE labreportid in
  (SELECT labreportid 
   FROM view_ods_result_lab_report 
   WHERE reporteddate BETWEEN '2020-07-01 00:00:00.000' AND CURRENT_TIMESTAMP);
        ''',
        'view_ods_std_assessment_v2' : '''
SELECT *
FROM dbo.view_ods_std_assessment_v2
        ''',
        'view_ods_std_pick_list_with_effective_dates' : '''
SELECT *
FROM dbo.view_ods_std_pick_list_with_effective_dates
        ''',
        'view_ods_bed' : '''
SELECT *
from view_ods_bed        
        ''',
        'view_ods_room' : '''
SELECT *
from view_ods_room       
        ''',
        'view_ods_unit' : '''
SELECT *
from view_ods_unit
        ''',
        'view_ods_payer' : '''
SELECT *
from view_ods_payer
        ''',
        'view_ods_floor' : '''
SELECT *
from view_ods_floor
''',
        'view_ods_provider' : '''
SELECT *
from view_ods_provider
''',
        'view_ods_medical_professional' : '''
SELECT *
from view_ods_medical_professional        
        ''',
        'view_ods_patient_provider' : '''
SELECT *
from view_ods_patient_provider
''',
    }
    return query_dict
