In [5]:
import pandas as pd
import numpy as np
import pyodbc 
import time
from sqlalchemy import create_engine
from datetime import datetime
import sqlite3

In [6]:
import sys
import logging
path = r'..\..\Logs\clinical_log.log'
logging.basicConfig(filename=path,
                    filemode='a',
                    format='%(asctime)s,%(msecs)d,%(name)s,%(levelname)s,%(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.DEBUG)
logger = logging.getLogger("Primary Care Provider Tracker")
# logger.info("testing log")

Warning: If there is no ptCurrent_Provider_Tracker table that is because it may not have been created yet. there is code in this Script that will create the table. Step 1: comment out the script that selects from the ptCurrent_Provider_Tracker table, uncomment useable = True int he next script, and run every cell. after that there will be the ptCurrent_Provider_Tracker table and you can uncomment the script and comment usable = True again.


In [7]:
# conn = sqlite3.connect('../../InSyncConnection/Database/InSyncClinical.db')
conn = create_engine(r'mssql+pyodbc://@PYTHON\SQLEXPRESS/InSync?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes', fast_executemany=True)

# cursor = conn.cursor()

# # printing all table names  
# sql_query = """SELECT name FROM sqlite_master
#     WHERE type='table';"""

# cursor.execute(sql_query)
# print(cursor.fetchall())

## Read in Data 

### Get new data from sql

In [8]:
# Get current provider data from sql 
try:
    sql='''
    SELECT 
        PatientID,
        PatientPrimaryProviderID AS 'ProviderID'
    FROM 
        tblPatientPrimaryProviders
    WHERE
       IsDefault LIKE 'True'
    '''
    new_provider_df = pd.read_sql(sql, conn)    
    logger.info(f"Successfully queried tblPatientPrimaryProviders.")            
except Exception as e:
    logger.error(f"Failed to query tblPatientPrimaryProviders.", exc_info=True) 
    print(e)


(pyodbc.OperationalError) ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: Error Locating Server/Instance Specified [xFFFFFFFF].  (-1) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (-1)')
(Background on this error at: https://sqlalche.me/e/20/e3q8)


In [9]:
# Get patient data
try:
    sql='''
    SELECT 
        PatientId AS PatientID,
        FirstName,
        LastName,
        DOB
    FROM 
        emr_PatientDetails
    '''
    patient_details_df = pd.read_sql(sql, conn)
    if not patient_details_df['FirstName'].isnull().all():
        patient_details_df['FirstName'] = patient_details_df['FirstName'].apply(lambda name: name.capitalize() if pd.notna(name) else name)
    
    if not patient_details_df['LastName'].isnull().all():
        patient_details_df['LastName'] = patient_details_df['LastName'].apply(lambda name: name.capitalize() if pd.notna(name) else name)
    
    logger.info(f"Successfully queried emr_PatientDetails.")            
except Exception as e:
    logger.error(f"Failed to query emr_PatientDetails.", exc_info=True) 
    print(e)


(pyodbc.OperationalError) ('08001', '[08001] [Microsoft][ODBC Driver 17 for SQL Server]SQL Server Network Interfaces: Error Locating Server/Instance Specified [xFFFFFFFF].  (-1) (SQLDriverConnect); [08001] [Microsoft][ODBC Driver 17 for SQL Server]Login timeout expired (0); [08001] [Microsoft][ODBC Driver 17 for SQL Server]A network-related or instance-specific error has occurred while establishing a connection to SQL Server. Server is not found or not accessible. Check if instance name is correct and if SQL Server is configured to allow remote connections. For more information see SQL Server Books Online. (-1)')
(Background on this error at: https://sqlalche.me/e/20/e3q8)


In [6]:
# sql = """
# select *
# FROM INFORMATION_SCHEMA.TABLES
# WHERE (TABLE_NAME LIKE 'pt%')
# """
# tables = pd.read_sql(sql, conn)
# tables

In [7]:
# Get previous provider info
useBackup = False
try:
    sql = '''SELECT * FROM ptCurrent_Provider_Tracker'''
    old_provider_df = pd.read_sql(sql, conn)
    logger.info(f"Successfully queried ptCurrent_Provider_Tracker.")
except Exception as e:
    useBackup = True
    logger.error(f"Failed to query ptCurrent_Provider_Tracker.", exc_info=True)
    print(e)

# get previous changes info
try:
    sql = '''SELECT * FROM ptPrimaryProvider_ChangeLog'''
    prev_changes_df = pd.read_sql(sql, conn)
    logger.info(f"Successfully queried ptOld_Provider_Tracker.")            
except Exception as e:
    useBackup = True
    logger.error(f"Failed to query ptOld_Provider_Tracker.", exc_info=True) 
    print(e)

### Get backed up data from excel
    Use if issue with the db

In [8]:
# useBackup = True
if useBackup == True:
    try:
        old_provider_df = pd.read_excel(r"../../../TestEnvironment/data/Provider Tracker.xlsx",
                                         sheet_name = "Old Provider")
        prev_changes_df = pd.read_excel(r"../../../TestEnvironment/data/Provider Tracker.xlsx",
                                         sheet_name = "Changes")
        logger.info("Successfully read in old data from excel backup")
    except Exception as e:
        logger.error("Failed to read in old data from excel backup")
prev_changes_df

Unnamed: 0,PatientID,ProviderID,DateofChange
0,620378,1858.0,07/09/2024
1,620715,1769.0,07/09/2024
2,622921,1527.0,07/09/2024
3,626682,1537.0,07/09/2024
4,628206,1537.0,07/09/2024
...,...,...,...
10750,622419,1599.0,06/14/2023
10751,622423,1599.0,06/14/2023
10752,622465,1599.0,06/14/2023
10753,622475,1599.0,06/14/2023


## Compare new data to old data 

In [9]:
try:    
    #merge frames
    consistant_providers = old_provider_df.merge(new_provider_df,how='outer',on='PatientID', indicator=False)
    
    #filter out consistant data
    inconsistant_mask = consistant_providers['ProviderID_x'] != consistant_providers['ProviderID_y']
    inconsistantProviders = consistant_providers[inconsistant_mask].copy()
    
    # reformat frame
    inconsistantProviders.drop('ProviderID_x', axis = 1, inplace=True)
    inconsistantProviders.rename(columns={'ProviderID_y': 'ProviderID'}, inplace=True)
    
    # add date of change
    today = datetime.today().strftime('%m/%d/%Y')
    inconsistantProviders['Date Of Change'] = today
    
    # concat prev changes
    all_changes = pd.concat([inconsistantProviders,prev_changes_df])
    logger.info(f"Successfully compared old data to new data.")            
except Exception as e:
    logger.error(f"Failed to compare old data to new data.", exc_info=True) 
    print(e)


## Push data to DB

In [10]:
# ", ".join([item + " " + str(new_provider_df[item].dtype) for item in new_provider_df.columns])

In [11]:
new_provider_df.columns.tolist()

['PatientID', 'ProviderID']

In [12]:
# changes ptCurrent_Provider_Tracker to today's current provider
table_name = "ptCurrent_Provider_Tracker"
try:
    new_provider_df.to_sql(table_name, conn, if_exists='replace', index = False)
    logger.info(f"Successfully pushed {table_name} to database.")
    new_provider_df.to_csv()
except Exception as e:
    logger.error(f"Failed to push {table_name} to database.", exc_info=True) 
    print(e)

In [13]:
# ", ".join([item + " " + str(all_changes[item].dtype) for item in all_changes.columns])

In [14]:
all_changes.columns.tolist()

['PatientID', 'ProviderID', 'Date Of Change', 'DateofChange']

In [15]:
all_changes = all_changes.rename(columns={"Date Of Change": "DateofChange"})

all_changes = all_changes[['PatientID',
                           'ProviderID',
                           'DateofChange']]

# table_name = "ptPrimaryProvider_ChangeLog"
try:
    all_changes.to_sql("ptPrimaryProvider_ChangeLog", conn, if_exists='replace', index = False)
    logger.info(f"Successfully pushed {table_name} to database.")            
except Exception as e:
    logger.error(f"Failed to push {table_name} to database.", exc_info=True) 
    print(e)

A column with name 'DateofChange' is already present in table 'ptPrimaryProvider_ChangeLog'.


In [16]:
conn.dispose()

## Write backup to excel 

In [17]:
# write data to excel as a backup in case something happens to the db
try:
    with pd.ExcelWriter('../../../TestEnvironment/data/Provider Tracker.xlsx') as writer:  
        new_provider_df.to_excel(writer, sheet_name='Old Provider', index = False)
        all_changes.to_excel(writer, sheet_name='Changes', index = False)
    logger.info("Successfully backed up changes to excel.")
except Exception as e:
    logger.error("Failed to back up changes to excel.")
    print(e)