In [212]:
import pandas as pd
import numpy as np
# import pyodbc 
import time
from sqlalchemy import create_engine
import os

import datetime
import sqlite3
from pandas.tseries.offsets import DateOffset

In [213]:
import sys
import logging
path = r"..\..\InSyncConnection\Code\clinical_log.txt"
logging.basicConfig(filename=path,
                    filemode='a',
                    format='%(asctime)s,%(msecs)d,%(name)s,%(levelname)s,%(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.DEBUG)
logger = logging.getLogger("NOMS-Survey Completion")

## DB Query

In [214]:
# conn = sqlite3.connect('../../InSyncConnection/Database/InSyncClinical.db')
conn = create_engine(r'mssql+pyodbc://@PYTHONSERVER\SQLEXPRESS/InSync?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes', fast_executemany=True)


# # # printing all table names
# sql_query = """SELECT name FROM sqlite_master
#      WHERE type='table';"""

# cursor.execute(sql_query)
# print(cursor.fetchall())

### Encounter Query 

In [215]:
try:
    # Select all patients from EcounterSummary Table
    testPatients = '''
    SELECT
        PatientId
    FROM 
        emr_PatientDetails
    WHERE 
        LOWER(FirstName) LIKE '%test%'
        OR LOWER(LastName) LIKE '%test%'
        OR LOWER(FirstName) LIKE '%patient%'
        OR LOWER(LastName) LIKE '%patient%'
        or CAST(MRNNumber AS INTEGER) < 55
    '''
    sql=f'''
    SELECT 
        PatientId as PatientID,
        VisitDateTime as EncounterDate
    FROM 
        tblEncounterSummary
        LEFT JOIN tblENcounterType ON (tblEncounterSummary.EncounterTypeId=tblENcounterType.EncounterTypeId)
    WHERE 
        IsBillable = 'TRUE'
        AND PatientID NOT IN ({testPatients})
        AND VISITDATETIME >= '2023-03-01'
    ORDER BY 
        PatientID, EncounterDate
    '''
    encounter_df = pd.read_sql(sql, conn)
    encounter_df['EncounterDate'] = pd.to_datetime(encounter_df['EncounterDate'])

    
    #keep the first encounter date
    encounter_df = encounter_df.drop_duplicates(subset="PatientID", keep='first')
   
    
    # get encounter time by hour
    encounter_df['Hour'] = encounter_df["EncounterDate"].dt.floor('h')
    
    # needed for filtering out transfer patients
    encounter_dictionary = dict(zip(encounter_df.PatientID, encounter_df.EncounterDate))
    
    logger.info(f"Successfully queried tblEncounterSummary.")
except Exception as e:
    logger.error(f"Failed to query tblEncounterSummary.", exc_info=True) 
    print(e)
    sys.exit(1)
encounter_df

Unnamed: 0,PatientID,EncounterDate,Hour
0,620285,2024-03-20 19:15:00,2024-03-20 19:00:00
2,620288,2023-10-08 12:00:00,2023-10-08 12:00:00
9,620292,2023-07-05 17:00:00,2023-07-05 17:00:00
52,620293,2023-10-10 18:15:00,2023-10-10 18:00:00
64,620295,2023-03-05 13:00:00,2023-03-05 13:00:00
...,...,...,...
144334,628834,2024-07-03 16:30:00,2024-07-03 16:00:00
144335,628843,2024-07-03 19:00:00,2024-07-03 19:00:00
144336,628848,2024-07-03 19:00:00,2024-07-03 19:00:00
144337,628850,2024-07-03 15:00:00,2024-07-03 15:00:00


### PatientDetails Query

In [216]:
try:
    # Select all patients
    sql= f'''
    SELECT 
        DISTINCT (PatientID) AS PatientID,
        FirstName,
        LastName,
        DOB,
        MRNNumber,
        PhoneNo AS Phone
    FROM 
        emr_PatientDetails
    WHERE
        PatientID NOT IN ({testPatients})
        AND LastName != 'Test'
        AND FirstName != 'Test'
        And MRNNumber LIKE '%2'
    ORDER BY 
        PatientID
    '''
    patient_details_df = pd.read_sql(sql, conn)

    
    logger.info(f"Successfully queried emr_PatientDetails.")
except Exception as e:
    logger.error(f"Failed to query emr_PatientDetails.", exc_info=True) 
    print(e)
    sys.exit(1)


patient_details_df

Unnamed: 0,PatientID,FirstName,LastName,DOB,MRNNumber,Phone
0,620291,Bruchy,.HALBERSTAM,2011-01-08,0000000062,718-858-1766
1,620311,Sarah,FRIEDRICH,1954-01-19,0000000082,718-387-1790
2,620321,Alexandra,Pluscarr,1980-08-30,0000000092,516-382-4053
3,620331,Chaya,Werde,2007-11-09,0000000102,718-781-2831
4,620361,Frimet,Lowinger,2005-05-06,0000000132,718-384-7431
...,...,...,...,...,...,...
837,628857,Gittel,Spitzer,2004-11-21,0000008622,917-497-8317
838,628867,Kaila,Brownstein,2004-06-08,0000008632,917-428-1520
839,628878,Yosef,Werde,2016-03-24,0000008642,718-781-2831
840,628888,Henna,Katz,2014-04-07,0000008652,718-302-9356


In [217]:
try:
    # Select all patients
    sql= f'''
    SELECT 
        DISTINCT (PatientID) AS PatientID,
        FirstName,
        LastName,
        DOB,
        MRNNumber,
        PhoneNo AS Phone
    FROM 
        emr_PatientDetails
    WHERE
        PatientID NOT IN ({testPatients})
        AND LastName != 'Test'
        AND FirstName != 'Test'
        And MRNNumber LIKE '%2'
    ORDER BY 
        PatientID
    '''
    patient_details_df = pd.read_sql(sql, conn)

    
    logger.info(f"Successfully queried emr_PatientDetails.")
except Exception as e:
    logger.error(f"Failed to query emr_PatientDetails.", exc_info=True) 
    print(e)
    sys.exit(1)


patient_details_df

Unnamed: 0,PatientID,FirstName,LastName,DOB,MRNNumber,Phone
0,620291,Bruchy,.HALBERSTAM,2011-01-08,0000000062,718-858-1766
1,620311,Sarah,FRIEDRICH,1954-01-19,0000000082,718-387-1790
2,620321,Alexandra,Pluscarr,1980-08-30,0000000092,516-382-4053
3,620331,Chaya,Werde,2007-11-09,0000000102,718-781-2831
4,620361,Frimet,Lowinger,2005-05-06,0000000132,718-384-7431
...,...,...,...,...,...,...
837,628857,Gittel,Spitzer,2004-11-21,0000008622,917-497-8317
838,628867,Kaila,Brownstein,2004-06-08,0000008632,917-428-1520
839,628878,Yosef,Werde,2016-03-24,0000008642,718-781-2831
840,628888,Henna,Katz,2014-04-07,0000008652,718-302-9356


### Discharged Query 

In [218]:
try:
    # Select all patients
    sql = f'''
    SELECT 
        PatientID,
        finalEncounterDate AS 'Final Encounter',
        'Discharged' As 'Status'
    FROM 
        ptPatient_Activity
    WHERE
        PatientID NOT IN ({testPatients})
        AND is_ActiveInInSync = 0
        OR is_lastEncounterDischarge = 1
       
    '''
    discharged_patients_df = pd.read_sql(sql, conn)
    
    
    
    
   
    
    logger.info(f"Successfully queried ptPatient_Activity.")
except Exception as e:
    logger.error(f"Failed to query ptPatient_Activity.", exc_info=True) 
    print(e)
    sys.exit(1)
discharged_patients_df

Unnamed: 0,PatientID,Final Encounter,Status
0,620290,2023-02-06 12:53:00,Discharged
1,620291,2022-07-07 12:27:00,Discharged
2,620296,2022-05-23 13:33:00,Discharged
3,620299,2022-03-14 23:09:00,Discharged
4,620300,2024-04-01 11:54:00,Discharged
...,...,...,...
2923,628341,2024-06-05 17:00:00,Discharged
2924,628345,2024-04-08 17:45:00,Discharged
2925,628363,2024-05-14 15:00:00,Discharged
2926,628413,2024-05-21 13:43:00,Discharged


### NOMS Query 

#### Baseline NOMS Taken

In [219]:
try:
    sql=f'''
    SELECT 
        PatientID,
        CreatedOn,
        StatusDesc AS 'Baseline NOMS',
        CASE 
            WHEN [StatusDesc] = 'Entered Into SPARS' THEN 'True'
            ELSE 'False'
        END AS [StatusDesc_Transformed]
    FROM 
        tblNOMS_AllAssessments
    WHERE 
        AssessmentKey LIKE '%Baseline%'
        AND (StatusDesc LIKE '%Entered%'
        OR StatusDesc LIKE '%Completed%')
        AND
        PatientID NOT IN ({testPatients})
        AND
        CreatedOn >= '2023-03-01'
    ORDER BY
        PatientID
    '''
    baseline_df = pd.read_sql(sql, conn)
    
   
    
   
    
    logger.info(f"Successfully queried tblNOMS_AllAssessments for Baselines.")
except Exception as e:
    logger.error(f"Failed to query tblNOMS_AllAssessments for Baselines.", exc_info=True) 
    print(e)
    sys.exit(1)

baseline_df 

Unnamed: 0,PatientID,CreatedOn,Baseline NOMS,StatusDesc_Transformed
0,620331,2024-04-25 14:27:51,Entered Into SPARS,True
1,620381,2023-07-31 16:35:55,Entered Into SPARS,True
2,620421,2023-05-03 14:34:24,Entered Into SPARS,True
3,620431,2023-04-25 18:02:03,Entered Into SPARS,True
4,620461,2023-05-11 15:13:43,Entered Into SPARS,True
...,...,...,...,...
413,628657,2024-06-04 09:21:54,Entered Into SPARS,True
414,628727,2024-06-24 11:27:53,Entered Into SPARS,True
415,628737,2024-07-01 17:24:05,Completed,False
416,628747,2024-06-24 11:34:24,Entered Into SPARS,True


#### Reassessment NOMS

In [220]:
try:
    sql=f'''
    SELECT 
        PatientID, 'True' As '6 Month Reassessment NOMS' 
        
    FROM 
        tblNOMS_AllAssessments
    WHERE 
        AssessmentKey LIKE 'Reassessment%'
        AND (StatusDesc LIKE '%Entered%'
        OR StatusDesc LIKE '%Completed%')
        AND
        PatientID NOT IN ({testPatients})
        ANd CreatedOn >= '2023-03-01'
    ORDER BY
        PatientID
    '''
    reassessment_df = pd.read_sql(sql, conn)
    
    
    # filter after march
    # march_mask = datetime.date(2023,3,1)
    # reassessment_df = df[df['CreatedOn'] >= march_mask]
    
#     # find transfer patients
#     transfer_patients=[]
#     for patient in reassessment_df['PatientID']:
#         if False != encounter_dictionary.get(patient, False):
#             transfer_patients.append(patient)
            
#     # remove transfers if reassessment is before 6 months
#     earliest_reassessment_date = datetime.date(2023,8,1)
#     for patient in transfer_patients:
#         first_encounter = encounter_dictionary.get(patient)
#     #     incremented_first_encounter = first_encounter + np.timedelta64(6, 'M')
#         reassmessnent_date = df[df['PatientID'] == patient]['CreatedOn']
#         print(reassmessnent_date)
#         if (reassmessnent_date.values > earliest_reassessment_date) == False:
#             reassessment_df = reassessment_df[reassessment_df['PatientID'] != patient].copy()
    
   
    
    logger.info(f"Successfully queried tblNOMS_AllAssessments for Reassessments.")
except Exception as e:
    logger.error(f"Failed to query tblNOMS_AllAssessments for Reassessments.", exc_info=True) 
    print(e)
    sys.exit(1)

reassessment_df

Unnamed: 0,PatientID,6 Month Reassessment NOMS
0,620381,True
1,620421,True
2,620431,True
3,620461,True
4,620501,True
...,...,...
229,627487,True
230,627567,True
231,627677,True
232,627687,True


#### Discharge NOMS

In [221]:
try:
    sql=f'''
    SELECT 
        DISTINCT (PatientID), 'Yes' as 'Discharge NOMS'
    FROM 
        tblNOMS_AllAssessments
    WHERE 
        AssessmentKey LIKE '%Discharge%'
        AND (StatusDesc LIKE '%Entered%'
        OR StatusDesc LIKE '%Completed%')
        AND
        PatientID NOT IN ({testPatients})
        AND CreatedOn >= '2023-03-01'
    ORDER BY
        PatientID
    '''
    df = pd.read_sql(sql, conn)

    discharge_df = df.drop_duplicates()
    

    logger.info(f"Successfully queried tblNOMS_AllAssessments for Discharges.")
except Exception as e:
    logger.error(f"Failed to query tblNOMS_AllAssessments for Discharges.", exc_info=True) 
    print(e)
    sys.exit(1)
    
discharge_df

Unnamed: 0,PatientID,Discharge NOMS
0,620541,Yes
1,620671,Yes
2,620681,Yes
3,620721,Yes
4,620731,Yes
...,...,...
123,627777,Yes
124,627827,Yes
125,627937,Yes
126,627977,Yes


#### NOMS Refusals

In [222]:
try:
    sql=f'''
    SELECT 
        PatientID, 'No' AS Consent,
       CASE 
            WHEN InterviewConductedNoID = '' THEN 'None Given'
            ELSE InterviewConductedNoID 
        END AS Reason
    FROM 
        tblNOMS_AllAssessments
    WHERE 
        AssessmentKey LIKE 'Base%'      AND
        InterviewConductedID LIKE '%0%' AND
        PatientID NOT IN ({testPatients})
    '''
    refused_df = pd.read_sql(sql, conn)

    logger.info(f"Successfully queried tblNOMS_AllAssessments for Baseline Refusals.")
except Exception as e:
    logger.error(f"Failed to query tblNOMS_AllAssessments for Baseline Refusals.", exc_info=True) 
    print(e)
    sys.exit(1)
refused_df

Unnamed: 0,PatientID,Consent,Reason
0,621243,No,3 - Client Refused This Interview
1,622991,No,3 - Client Refused This Interview
2,621451,No,4 - Client Was Not Reached For Interview
3,621171,No,2 - Client Was Impaired Or Unable To Provide C...
4,622681,No,3 - Client Refused This Interview
...,...,...,...
386,628597,No,4 - Client Was Not Reached For Interview
387,628727,No,3 - Client Refused This Interview
388,626005,No,3 - Client Refused This Interview
389,621521,No,3 - Client Refused This Interview


#### NOMS Consents

In [223]:
try:
    sql=f'''
    SELECT  
        PatientID, 'Yes' as Consent
    FROM 
        tblNOMS_AllAssessments
    WHERE 
        AssessmentKey LIKE 'Base%'      AND
        InterviewConductedID LIKE '%1%' AND
        PatientID NOT IN ({testPatients})
    '''
    agreed_df = pd.read_sql(sql, conn)
    
    
    logger.info(f"Successfully queried tblNOMS_AllAssessments for Baseline Consents.")
except Exception as e:
    logger.error(f"Failed to query tblNOMS_AllAssessments for Baseline Consents.", exc_info=True) 
    print(e)
    sys.exit(1)
agreed_df

Unnamed: 0,PatientID,Consent
0,625535,Yes
1,622240,Yes
2,626485,Yes
3,625111,Yes
4,621127,Yes
...,...,...
337,627707,Yes
338,620966,Yes
339,626135,Yes
340,622248,Yes


#### Append Consent dfs 

In [224]:
consent_df = pd.concat([refused_df, agreed_df], ignore_index=True)

### Entered into Spars

#### Baselines

In [225]:
try:
    sql=f'''
    SELECT 
        PatientID,
        StatusDesc AS 'Baseline SPARS'
    FROM 
        tblNOMSAssessmentDetails
    WHERE
        PatientID NOT IN ({testPatients})
    ORDER BY
        PatientID
    '''
    baseline_SPARS = pd.read_sql(sql,conn)
        
    logger.info(f"Successfully queried tblNOMSAssessmentDetails for Baselines entered to SPARS.")
except Exception as e:
    logger.error(f"Failed to query tblNOMSAssessmentDetails for Baselines entered to SPARS.", exc_info=True) 
    print(e)
    sys.exit(1)

baseline_SPARS

Unnamed: 0,PatientID,Baseline SPARS
0,620295,
1,620308,
2,620331,Entered Into SPARS
3,620333,
4,620373,
...,...,...
728,628657,Entered Into SPARS
729,628727,Entered Into SPARS
730,628737,Completed
731,628747,Entered Into SPARS


#### Reassessments

In [226]:
try:
    sql=f'''
    SELECT 
        PatientID,
        StatusDesc AS 'Reassessment SPARS'
    FROM 
        tblNOMSReAssessmentDetails
    WHERE
        PatientID NOT IN ({testPatients})
    ORDER BY
        PatientID
    '''
    reassessment_SPARS = pd.read_sql(sql,conn)
    
    logger.info(f"Successfully queried tblNOMSAssessmentDetails for Reassessments entered to SPARS.")
except Exception as e:
    logger.error(f"Failed to query tblNOMSAssessmentDetails for Reassessments entered to SPARS.", exc_info=True) 
    print(e)
    sys.exit(1)

reassessment_SPARS

Unnamed: 0,PatientID,Reassessment SPARS
0,620295,
1,620333,
2,620373,
3,620380,
4,620381,Entered Into SPARS
...,...,...
454,627487,Entered Into SPARS
455,627567,Entered Into SPARS
456,627677,Entered Into SPARS
457,627687,Entered Into SPARS


#### Discharges 

In [227]:
try:
    sql=f'''
    SELECT 
        PatientID,
        StatusDesc AS 'Discharge SPARS'
    FROM 
        tblNOMSDischargeDetails
    WHERE
        PatientID NOT IN ({testPatients})
    ORDER BY
        PatientID
    '''
    discharge_SPARS = pd.read_sql(sql,conn)
    discharge_SPARS =  discharge_SPARS.drop_duplicates()
    
    logger.info(f"Successfully queried tblNOMSAssessmentDetails for Discharges entered to SPARS.")
except Exception as e:
    logger.error(f"Failed to query tblNOMSAssessmentDetails for Discharges entered to SPARS.", exc_info=True) 
    print(e)
    sys.exit(1)

discharge_SPARS
    

Unnamed: 0,PatientID,Discharge SPARS
0,620541,Entered Into SPARS
1,620588,
2,620671,Entered Into SPARS
3,620681,Entered Into SPARS
4,620721,Entered Into SPARS
...,...,...
234,627977,Completed
235,628147,Completed
236,628207,First Attempt
237,628257,First Attempt


## Merges

### Merge patient_details and encounters 

In [228]:
try:
    full_data_collection_df = patient_details_df.merge(encounter_df,
                                                       on='PatientID',
                                                       how='left')
    # drop patients without encounters
    full_data_collection_df = full_data_collection_df[full_data_collection_df['EncounterDate'].notna()]
    
    logger.info(f"Successfully merged patient deatils with encounters.")
except Exception as e:
    logger.error(f"Failed to merge patient deatils with encounters.", exc_info=True) 
    print(e)
    sys.exit(1)

full_data_collection_df

Unnamed: 0,PatientID,FirstName,LastName,DOB,MRNNumber,Phone,EncounterDate,Hour
3,620331,Chaya,Werde,2007-11-09,0000000102,718-781-2831,2024-04-15 15:15:00,2024-04-15 15:00:00
6,620381,Sarah,Masri,2017-10-15,0000000152,347-350-3776,2023-07-09 19:30:00,2023-07-09 19:00:00
10,620421,Mattanya,Ginsburg,1984-08-09,0000000192,718-755-1077,2023-03-03 11:15:00,2023-03-03 11:00:00
11,620431,SHOLOM,Raitport,2002-09-26,0000000202,718-310-7547,2023-03-02 18:45:00,2023-03-02 18:00:00
13,620461,Sarah,Fischer,1976-08-05,0000000232,347-930-9550,2023-03-05 11:30:00,2023-03-05 11:00:00
...,...,...,...,...,...,...,...,...
816,628647,Zev,Lein,2015-01-13,0000008412,646-410-3049,2024-05-28 10:00:00,2024-05-28 10:00:00
817,628657,Simcha,Teitelbaum,2010-08-05,0000008422,347-799-4524,2024-05-30 17:00:00,2024-05-30 17:00:00
824,628727,Rivka,Leitner,1991-06-16,0000008492,347-998-4601,2024-06-19 17:00:00,2024-06-19 17:00:00
825,628737,Tilly,Jacobowitz,2017-08-14,0000008502,718-384-1495,2024-06-26 16:00:00,2024-06-26 16:00:00


### Merge in Statuses

In [229]:
discharged_patients_df.columns.tolist()


['PatientID', 'Final Encounter', 'Status']

In [230]:
try:
    full_data_collection_df = full_data_collection_df.merge(discharged_patients_df,
                                                            on = 'PatientID',
                                                            how = 'left')
    # fill nan status
    full_data_collection_df['Status']= full_data_collection_df['Status'].fillna('Active')
    
    logger.info(f"Successfully merged in discharged patients.")
except Exception as e:
    logger.error(f"Failed to merge in discharged patients.", exc_info=True) 
    print(e)
    sys.exit(1)   

### Merge in Baseline NOMS

In [231]:
try:
    full_data_collection_df = full_data_collection_df.merge(baseline_df,
                                                            on='PatientID',
                                                            how='left')
    full_data_collection_df['Baseline NOMS'] = full_data_collection_df['Baseline NOMS'].fillna(False)

    logger.info(f"Successfully merged in Baseline NOMS.")
except Exception as e:
    logger.error(f"Failed to merge in Baseline NOMS.", exc_info=True) 
    print(e)
    sys.exit(1)   

In [232]:
full_data_collection_df

Unnamed: 0,PatientID,FirstName,LastName,DOB,MRNNumber,Phone,EncounterDate,Hour,Final Encounter,Status,CreatedOn,Baseline NOMS,StatusDesc_Transformed
0,620331,Chaya,Werde,2007-11-09,0000000102,718-781-2831,2024-04-15 15:15:00,2024-04-15 15:00:00,NaT,Active,2024-04-25 14:27:51,Entered Into SPARS,True
1,620381,Sarah,Masri,2017-10-15,0000000152,347-350-3776,2023-07-09 19:30:00,2023-07-09 19:00:00,NaT,Active,2023-07-31 16:35:55,Entered Into SPARS,True
2,620421,Mattanya,Ginsburg,1984-08-09,0000000192,718-755-1077,2023-03-03 11:15:00,2023-03-03 11:00:00,NaT,Active,2023-05-03 14:34:24,Entered Into SPARS,True
3,620431,SHOLOM,Raitport,2002-09-26,0000000202,718-310-7547,2023-03-02 18:45:00,2023-03-02 18:00:00,NaT,Active,2023-04-25 18:02:03,Entered Into SPARS,True
4,620461,Sarah,Fischer,1976-08-05,0000000232,347-930-9550,2023-03-05 11:30:00,2023-03-05 11:00:00,NaT,Active,2023-05-11 15:13:43,Entered Into SPARS,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
405,628647,Zev,Lein,2015-01-13,0000008412,646-410-3049,2024-05-28 10:00:00,2024-05-28 10:00:00,NaT,Active,2024-05-29 11:28:47,Entered Into SPARS,True
406,628657,Simcha,Teitelbaum,2010-08-05,0000008422,347-799-4524,2024-05-30 17:00:00,2024-05-30 17:00:00,NaT,Active,2024-06-04 09:21:54,Entered Into SPARS,True
407,628727,Rivka,Leitner,1991-06-16,0000008492,347-998-4601,2024-06-19 17:00:00,2024-06-19 17:00:00,NaT,Active,2024-06-24 11:27:53,Entered Into SPARS,True
408,628737,Tilly,Jacobowitz,2017-08-14,0000008502,718-384-1495,2024-06-26 16:00:00,2024-06-26 16:00:00,NaT,Active,2024-07-01 17:24:05,Completed,False


### Merge in Reassessment NOMS

In [233]:
try:
    full_data_collection_df = full_data_collection_df.merge(reassessment_df,
                                                            on='PatientID',
                                                            how='left')
    full_data_collection_df['6 Month Reassessment NOMS'] = full_data_collection_df['6 Month Reassessment NOMS'].fillna(False)

    logger.info(f"Successfully merged in Reassessment NOMS.")
except Exception as e:
    logger.error(f"Failed to merge in Reassessment NOMS.", exc_info=True) 
    print(e)
    sys.exit(1) 

In [234]:
full_data_collection_df

Unnamed: 0,PatientID,FirstName,LastName,DOB,MRNNumber,Phone,EncounterDate,Hour,Final Encounter,Status,CreatedOn,Baseline NOMS,StatusDesc_Transformed,6 Month Reassessment NOMS
0,620331,Chaya,Werde,2007-11-09,0000000102,718-781-2831,2024-04-15 15:15:00,2024-04-15 15:00:00,NaT,Active,2024-04-25 14:27:51,Entered Into SPARS,True,False
1,620381,Sarah,Masri,2017-10-15,0000000152,347-350-3776,2023-07-09 19:30:00,2023-07-09 19:00:00,NaT,Active,2023-07-31 16:35:55,Entered Into SPARS,True,True
2,620421,Mattanya,Ginsburg,1984-08-09,0000000192,718-755-1077,2023-03-03 11:15:00,2023-03-03 11:00:00,NaT,Active,2023-05-03 14:34:24,Entered Into SPARS,True,True
3,620431,SHOLOM,Raitport,2002-09-26,0000000202,718-310-7547,2023-03-02 18:45:00,2023-03-02 18:00:00,NaT,Active,2023-04-25 18:02:03,Entered Into SPARS,True,True
4,620461,Sarah,Fischer,1976-08-05,0000000232,347-930-9550,2023-03-05 11:30:00,2023-03-05 11:00:00,NaT,Active,2023-05-11 15:13:43,Entered Into SPARS,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
408,628647,Zev,Lein,2015-01-13,0000008412,646-410-3049,2024-05-28 10:00:00,2024-05-28 10:00:00,NaT,Active,2024-05-29 11:28:47,Entered Into SPARS,True,False
409,628657,Simcha,Teitelbaum,2010-08-05,0000008422,347-799-4524,2024-05-30 17:00:00,2024-05-30 17:00:00,NaT,Active,2024-06-04 09:21:54,Entered Into SPARS,True,False
410,628727,Rivka,Leitner,1991-06-16,0000008492,347-998-4601,2024-06-19 17:00:00,2024-06-19 17:00:00,NaT,Active,2024-06-24 11:27:53,Entered Into SPARS,True,False
411,628737,Tilly,Jacobowitz,2017-08-14,0000008502,718-384-1495,2024-06-26 16:00:00,2024-06-26 16:00:00,NaT,Active,2024-07-01 17:24:05,Completed,False,False


### Merge in Discharge NOMS 

In [235]:
try:
    full_data_collection_df = full_data_collection_df.merge(discharge_df,
                                                            on='PatientID',
                                                            how='left')
    full_data_collection_df['Discharge NOMS'] = full_data_collection_df['Discharge NOMS'].fillna(False)

    logger.info(f"Successfully merged in Discharge NOMS.")
except Exception as e:
    logger.error(f"Failed to merge in Discharge NOMS.", exc_info=True) 
    print(e)
    sys.exit(1) 

In [236]:
full_data_collection_df

Unnamed: 0,PatientID,FirstName,LastName,DOB,MRNNumber,Phone,EncounterDate,Hour,Final Encounter,Status,CreatedOn,Baseline NOMS,StatusDesc_Transformed,6 Month Reassessment NOMS,Discharge NOMS
0,620331,Chaya,Werde,2007-11-09,0000000102,718-781-2831,2024-04-15 15:15:00,2024-04-15 15:00:00,NaT,Active,2024-04-25 14:27:51,Entered Into SPARS,True,False,False
1,620381,Sarah,Masri,2017-10-15,0000000152,347-350-3776,2023-07-09 19:30:00,2023-07-09 19:00:00,NaT,Active,2023-07-31 16:35:55,Entered Into SPARS,True,True,False
2,620421,Mattanya,Ginsburg,1984-08-09,0000000192,718-755-1077,2023-03-03 11:15:00,2023-03-03 11:00:00,NaT,Active,2023-05-03 14:34:24,Entered Into SPARS,True,True,False
3,620431,SHOLOM,Raitport,2002-09-26,0000000202,718-310-7547,2023-03-02 18:45:00,2023-03-02 18:00:00,NaT,Active,2023-04-25 18:02:03,Entered Into SPARS,True,True,False
4,620461,Sarah,Fischer,1976-08-05,0000000232,347-930-9550,2023-03-05 11:30:00,2023-03-05 11:00:00,NaT,Active,2023-05-11 15:13:43,Entered Into SPARS,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
408,628647,Zev,Lein,2015-01-13,0000008412,646-410-3049,2024-05-28 10:00:00,2024-05-28 10:00:00,NaT,Active,2024-05-29 11:28:47,Entered Into SPARS,True,False,False
409,628657,Simcha,Teitelbaum,2010-08-05,0000008422,347-799-4524,2024-05-30 17:00:00,2024-05-30 17:00:00,NaT,Active,2024-06-04 09:21:54,Entered Into SPARS,True,False,False
410,628727,Rivka,Leitner,1991-06-16,0000008492,347-998-4601,2024-06-19 17:00:00,2024-06-19 17:00:00,NaT,Active,2024-06-24 11:27:53,Entered Into SPARS,True,False,False
411,628737,Tilly,Jacobowitz,2017-08-14,0000008502,718-384-1495,2024-06-26 16:00:00,2024-06-26 16:00:00,NaT,Active,2024-07-01 17:24:05,Completed,False,False,False


### Merge in SPARS Status

In [237]:
try: # add the baseline SPARS
    full_data_collection_df = full_data_collection_df.merge(baseline_SPARS,
                                                            on='PatientID',
                                                            how='left')
    logger.info(f"Successfully merged in Baseline SPARS.")
except Exception as e:
    logger.error(f"Failed to merge in Baseline SPARS.", exc_info=True) 
    print(e)
    sys.exit(1)
    

try: # add the reassessment SPARS
    full_data_collection_df = full_data_collection_df.merge(reassessment_SPARS,
                                                        on='PatientID',
                                                        how='left')
    logger.info(f"Successfully merged in Reassessment SPARS.")
except Exception as e:
    logger.error(f"Failed to merge in Reassessment SPARS.", exc_info=True) 
    print(e)
    sys.exit(1)


try: # add the discharge SPARS
    full_data_collection_df = full_data_collection_df.merge(discharge_SPARS, 
                                                        on='PatientID',
                                                        how='left')
    logger.info(f"Successfully merged in Discharge SPARS.")
except Exception as e:
    logger.error(f"Failed to merge in Discharge SPARS.", exc_info=True) 
    print(e)
    sys.exit(1)

foo = full_data_collection_df.copy()

In [238]:
foo

Unnamed: 0,PatientID,FirstName,LastName,DOB,MRNNumber,Phone,EncounterDate,Hour,Final Encounter,Status,CreatedOn,Baseline NOMS,StatusDesc_Transformed,6 Month Reassessment NOMS,Discharge NOMS,Baseline SPARS,Reassessment SPARS,Discharge SPARS
0,620331,Chaya,Werde,2007-11-09,0000000102,718-781-2831,2024-04-15 15:15:00,2024-04-15 15:00:00,NaT,Active,2024-04-25 14:27:51,Entered Into SPARS,True,False,False,Entered Into SPARS,,
1,620381,Sarah,Masri,2017-10-15,0000000152,347-350-3776,2023-07-09 19:30:00,2023-07-09 19:00:00,NaT,Active,2023-07-31 16:35:55,Entered Into SPARS,True,True,False,Entered Into SPARS,Entered Into SPARS,
2,620421,Mattanya,Ginsburg,1984-08-09,0000000192,718-755-1077,2023-03-03 11:15:00,2023-03-03 11:00:00,NaT,Active,2023-05-03 14:34:24,Entered Into SPARS,True,True,False,Entered Into SPARS,Entered Into SPARS,
3,620431,SHOLOM,Raitport,2002-09-26,0000000202,718-310-7547,2023-03-02 18:45:00,2023-03-02 18:00:00,NaT,Active,2023-04-25 18:02:03,Entered Into SPARS,True,True,False,Entered Into SPARS,Entered Into SPARS,
4,620461,Sarah,Fischer,1976-08-05,0000000232,347-930-9550,2023-03-05 11:30:00,2023-03-05 11:00:00,NaT,Active,2023-05-11 15:13:43,Entered Into SPARS,True,True,False,Entered Into SPARS,Entered Into SPARS,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
442,628647,Zev,Lein,2015-01-13,0000008412,646-410-3049,2024-05-28 10:00:00,2024-05-28 10:00:00,NaT,Active,2024-05-29 11:28:47,Entered Into SPARS,True,False,False,Entered Into SPARS,,
443,628657,Simcha,Teitelbaum,2010-08-05,0000008422,347-799-4524,2024-05-30 17:00:00,2024-05-30 17:00:00,NaT,Active,2024-06-04 09:21:54,Entered Into SPARS,True,False,False,Entered Into SPARS,,
444,628727,Rivka,Leitner,1991-06-16,0000008492,347-998-4601,2024-06-19 17:00:00,2024-06-19 17:00:00,NaT,Active,2024-06-24 11:27:53,Entered Into SPARS,True,False,False,Entered Into SPARS,,
445,628737,Tilly,Jacobowitz,2017-08-14,0000008502,718-384-1495,2024-06-26 16:00:00,2024-06-26 16:00:00,NaT,Active,2024-07-01 17:24:05,Completed,False,False,False,Completed,,


## Merge in New Notes

In [239]:
NoteSubTypeDict = {5: "First Attempt",
                    6: 'First Attempt',
                    7: "First Attempt"}
NoteSubTypeColumnDict = {5: 'Baseline SPARS',
                          6: "Reassessment SPARS",
                          7: "Discharge SPARS"}
try:
    sql=f'''
    SELECT 
        *
    FROM 
        tblPatientNotes
    WHERE
        NoteSubType IN (5, 6, 7)
    ORDER BY
        PatientID
    '''
    patientNotes = pd.read_sql(sql,conn)
    

    
    patientNotes['NoteAddedOn'] = pd.to_datetime(patientNotes['NoteAddedOn'])
    patientNotes.sort_values(by='NoteAddedOn', ascending=False)
    patientNotes['NoteSubTypeName'] = patientNotes['NoteSubType'].map(lambda subtypeID: NoteSubTypeDict[int(subtypeID)])
    patientNotes['NoteSubTypeColumn'] = patientNotes['NoteSubType'].map(lambda subtypeID: NoteSubTypeColumnDict[int(subtypeID)])
    patientNotesPivot = patientNotes.pivot(index=['PatientID'], columns = "NoteSubTypeColumn", values="NoteSubTypeName").reset_index()
    patientbaselineDict = patientNotesPivot.set_index('PatientID').to_dict()['Baseline SPARS']
    patientReassessmentDict = patientNotesPivot.set_index('PatientID').to_dict()['Reassessment SPARS']
    patientDischargeDict = patientNotesPivot.set_index('PatientID').to_dict()['Discharge SPARS']

    def checkforBaselineNotes(patientID, note):
        if note == "" or str(note) == 'nan':
            return patientbaselineDict.get(patientID, "")
        else:
            return note

    def checkforReassessmentNotes(patientID, note):
        if note == "" or str(note) == 'nan':
            return patientReassessmentDict.get(patientID, "")
        else:
            return note

    def checkforDischargeNotes(patientID, note):
        if note == "" or str(note) == 'nan':
            return patientDischargeDict.get(patientID, "")
        else:
            return note
    full_data_collection_df['Baseline SPARS'] = full_data_collection_df.apply(lambda row: checkforBaselineNotes(row['PatientID'], row['Baseline SPARS']), axis=1)
    full_data_collection_df['Reassessment SPARS'] = full_data_collection_df.apply(lambda row: checkforReassessmentNotes(row['PatientID'], row['Reassessment SPARS']), axis=1)
    full_data_collection_df['Discharge SPARS'] = full_data_collection_df.apply(lambda row: checkforDischargeNotes(row['PatientID'], row['Discharge SPARS']), axis=1)
#     logger.info(f"Successfully queried tblNotes and created columns.")
except Exception as e:
#     logger.error(f"Failed to query table notes and creat columns.", exc_info=True) 
    print(e)

## Prettify Data

In [240]:
full_data_collection_df = foo.copy()

try: # Format Names
    first_name = full_data_collection_df['FirstName'].map(lambda name: str(name).capitalize())
    last_name = full_data_collection_df['LastName'].map(lambda name: str(name).capitalize())
    full_data_collection_df['Name'] = last_name + ', ' + first_name
    full_data_collection_df.drop(columns=['FirstName','LastName'], inplace=True)
    
    logger.info(f"Successfully formated names.")
except Exception as e:
    logger.error(f"Failed to format names.", exc_info=True) 
    print(e)
    sys.exit(1)
    
try: # Baseline Date
    full_data_collection_df['Hour'] = pd.to_datetime(full_data_collection_df['Hour'])
    full_data_collection_df['Baseline Due Date'] = full_data_collection_df['Hour'] + DateOffset(days=30)
    full_data_collection_df['Baseline Due Date'] = full_data_collection_df['Baseline Due Date'].map(lambda num: num.strftime('%m-%d-%Y'))

    logger.info(f"Successfully added Baseline Due Date.")
except Exception as e:
    logger.error(f"Failed to add Baseline Due Date.", exc_info=True) 
    print(e)
    sys.exit(1)
    
try:    # Baseline Warning
    full_data_collection_df['Baseline Due Date'] = pd.to_datetime(full_data_collection_df['Baseline Due Date'])
    full_data_collection_df['Baseline Warning'] = (full_data_collection_df['Baseline Due Date'] <= (pd.Timestamp.today())) & (full_data_collection_df['Baseline NOMS'] == False)
    
    logger.info(f"Successfully added Baseline Warning.")
except Exception as e:
    logger.error(f"Failed to add Baseline Warning.", exc_info=True) 
    print(e)
    sys.exit(1)
    
try:    # 6 Month Reassessment Date
    # we're going to set the default encounter date to the 15th of the month to match SPARS
    full_data_collection_df['Hour'] = full_data_collection_df['Hour'].apply(lambda dt: dt.replace(day=15))
    full_data_collection_df['6 Month Reassessment Date'] = full_data_collection_df['Hour'] + DateOffset(months=6)
    full_data_collection_df['6 Month Reassessment Date'] = full_data_collection_df['6 Month Reassessment Date'].map(lambda num: num.strftime('%m-%d-%Y'))

    logger.info(f"Successfully added Reassessment Date.")
except Exception as e:
    logger.error(f"Failed to add Reassessment Date.", exc_info=True) 
    print(e)
    sys.exit(1)
    
try:    # 6 Month Warning
    full_data_collection_df['6 Month Reassessment Date'] = pd.to_datetime(full_data_collection_df['6 Month Reassessment Date'])
    full_data_collection_df['6 Month Reassessment Warning'] = full_data_collection_df['6 Month Reassessment Date'] - DateOffset(months=1) <= pd.Timestamp.today()

    logger.info(f"Successfully added Reassessment Warning.")
except Exception as e:
    logger.error(f"Failed to add Reassessment Warning.", exc_info=True) 
    print(e)
    sys.exit(1)
    
try:    # add days since first encounter
    today = datetime.date.today()
    today = pd.to_datetime(today)
    def daysSinceFirstEncounter(row):
        date = row['EncounterDate']
        if type(date) == pd.Timestamp:
            return (today - date).days
        return np.nan
#     def daysTillBLineCompletion(row):
#         encounter_date = row['EncounterDate']
#         if isinstance(encounter_date, pd.Timestamp):
            
    def daysTillThirtyDaysAfter(row):
        encounter_date = row['EncounterDate']
        if isinstance(encounter_date, pd.Timestamp):
            # Calculate thirty days after the encounter date
            thirty_days_after = encounter_date + pd.DateOffset(days=30)
            days_till_thirty_days_after = (thirty_days_after - pd.Timestamp.today()).days
            return days_till_thirty_days_after
        return np.nan
    def daysTillSevenMonths(row):
        encounter_date = row['EncounterDate']
        if isinstance(encounter_date, pd.Timestamp):
            # Calculate seven months from the encounter date
            seven_months_later = encounter_date + pd.DateOffset(months=7)
            # Calculate the number of days between today and seven months later
            days_till_seven_months = (seven_months_later - pd.Timestamp.today()).days
            return days_till_seven_months
        return np.nan
    full_data_collection_df['Days Since Encounter'] = full_data_collection_df.apply(lambda row: daysSinceFirstEncounter(row), axis=1)  
    full_data_collection_df['Days Till Reassessment Date'] = full_data_collection_df.apply(lambda row: daysTillSevenMonths(row), axis=1)  
    full_data_collection_df['Days Till Baseline Date'] = full_data_collection_df.apply(lambda row: daysTillThirtyDaysAfter(row), axis=1)  
    logger.info(f"Successfully added Days Since First Encounter and days remaining till baseline/reassessment due dates.")
except Exception as e:
    logger.error(f"Failed to add Days Since First Encounter and days remaining till baseline/reassessment due dates.", exc_info=True) 
    print(e)
    sys.exit(1)

try:    # add days since final encounter
    def daysSinceDischarge(row):
        date = row['Final Encounter']
        if type(date) == pd.Timestamp:
            return (today - date).days
        return np.nan
    full_data_collection_df['Days Since Final Encounter'] = full_data_collection_df.apply(lambda row: daysSinceDischarge(row), axis=1)

    logger.info(f"Successfully added Days Since Final Encounter.")
except Exception as e:
    logger.error(f"Failed to add Days Since Final Encounter.", exc_info=True) 
    print(e)
    sys.exit(1)
    
try:     # find all discharged patients and reset Status to "Discharged" 
    discharge_mask_1 = full_data_collection_df['Status'] == "Discharged"
    discharge_mask_2 = full_data_collection_df['Discharge NOMS'] == True
    discharge_mask_3 = full_data_collection_df['Discharge SPARS'] == 'Entered Into SPARS'
    completed_discharge_mask = np.logical_or(discharge_mask_1, discharge_mask_2, discharge_mask_3)
    full_data_collection_df.loc[completed_discharge_mask, 'Status'] = 'Discharged'

    logger.info(f"Successfully updated discharged statuses.")
except Exception as e:
    logger.error(f"Failed to update discharged statuses.", exc_info=True) 
    print(e)
    sys.exit(1)

In [241]:
full_data_collection_df[full_data_collection_df['PatientID'].duplicated()]

Unnamed: 0,PatientID,DOB,MRNNumber,Phone,EncounterDate,Hour,Final Encounter,Status,CreatedOn,Baseline NOMS,...,Discharge SPARS,Name,Baseline Due Date,Baseline Warning,6 Month Reassessment Date,6 Month Reassessment Warning,Days Since Encounter,Days Till Reassessment Date,Days Till Baseline Date,Days Since Final Encounter
38,621201,1980-05-20,972,718-541-4343,2023-03-12 17:30:00,2023-03-15 17:00:00,2023-03-13 10:44:00,Discharged,2023-07-20 11:24:10,Entered Into SPARS,...,Entered Into SPARS,"Levin, Naomi",2023-04-11,False,2023-09-15,True,485,-272,-456,484.0
39,621201,1980-05-20,972,718-541-4343,2023-03-12 17:30:00,2023-03-15 17:00:00,2023-03-13 10:44:00,Discharged,2023-07-20 11:24:10,Entered Into SPARS,...,,"Levin, Naomi",2023-04-11,False,2023-09-15,True,485,-272,-456,484.0
40,621201,1980-05-20,972,718-541-4343,2023-03-12 17:30:00,2023-03-15 17:00:00,2023-03-13 10:44:00,Discharged,2023-07-20 11:24:10,Entered Into SPARS,...,Entered Into SPARS,"Levin, Naomi",2023-04-11,False,2023-09-15,True,485,-272,-456,484.0
41,621201,1980-05-20,972,718-541-4343,2023-03-12 17:30:00,2023-03-15 17:00:00,2023-03-13 10:44:00,Discharged,2023-07-20 11:24:10,Entered Into SPARS,...,,"Levin, Naomi",2023-04-11,False,2023-09-15,True,485,-272,-456,484.0
42,621201,1980-05-20,972,718-541-4343,2023-03-12 17:30:00,2023-03-15 17:00:00,2023-03-13 10:44:00,Discharged,2023-07-20 11:24:10,Entered Into SPARS,...,Entered Into SPARS,"Levin, Naomi",2023-04-11,False,2023-09-15,True,485,-272,-456,484.0
43,621201,1980-05-20,972,718-541-4343,2023-03-12 17:30:00,2023-03-15 17:00:00,2023-03-13 10:44:00,Discharged,2023-07-20 11:24:10,Entered Into SPARS,...,,"Levin, Naomi",2023-04-11,False,2023-09-15,True,485,-272,-456,484.0
44,621201,1980-05-20,972,718-541-4343,2023-03-12 17:30:00,2023-03-15 17:00:00,2023-03-13 10:44:00,Discharged,2023-07-20 11:24:10,Entered Into SPARS,...,Entered Into SPARS,"Levin, Naomi",2023-04-11,False,2023-09-15,True,485,-272,-456,484.0
45,621201,1980-05-20,972,718-541-4343,2023-03-12 17:30:00,2023-03-15 17:00:00,2023-03-13 10:44:00,Discharged,2023-07-20 11:24:10,Entered Into SPARS,...,,"Levin, Naomi",2023-04-11,False,2023-09-15,True,485,-272,-456,484.0
46,621201,1980-05-20,972,718-541-4343,2023-03-12 17:30:00,2023-03-15 17:00:00,2023-03-13 10:44:00,Discharged,2023-07-20 11:24:10,Entered Into SPARS,...,Entered Into SPARS,"Levin, Naomi",2023-04-11,False,2023-09-15,True,485,-272,-456,484.0
47,621201,1980-05-20,972,718-541-4343,2023-03-12 17:30:00,2023-03-15 17:00:00,2023-03-13 10:44:00,Discharged,2023-07-20 11:24:10,Entered Into SPARS,...,,"Levin, Naomi",2023-04-11,False,2023-09-15,True,485,-272,-456,484.0


In [242]:
# Reorder Columns
full_data_collection_df = full_data_collection_df[['PatientID',
                                                   'Name',
                                                   'DOB',
                                                   'Phone',
                                                   'MRNNumber',
                                                   'Status',
                                                   'EncounterDate',
                                                   'Hour',
                                                   'Days Since Encounter',
                                                   'Days Till Reassessment Date',
                                                   'Days Till Baseline Date',
                                                   'Baseline NOMS',
                                                   'Baseline Due Date',
                                                   'Baseline Warning',
                                                   'Baseline SPARS',
                                                   '6 Month Reassessment NOMS',
                                                   '6 Month Reassessment Date',
                                                   '6 Month Reassessment Warning',
                                                   'Reassessment SPARS',
                                                   'Final Encounter',
                                                   'Days Since Final Encounter',
                                                   'Discharge NOMS',
                                                   'Discharge SPARS']]
full_data_collection_df = full_data_collection_df.drop_duplicates('PatientID')
full_data_collection_df

Unnamed: 0,PatientID,Name,DOB,Phone,MRNNumber,Status,EncounterDate,Hour,Days Since Encounter,Days Till Reassessment Date,...,Baseline Warning,Baseline SPARS,6 Month Reassessment NOMS,6 Month Reassessment Date,6 Month Reassessment Warning,Reassessment SPARS,Final Encounter,Days Since Final Encounter,Discharge NOMS,Discharge SPARS
0,620331,"Werde, Chaya",2007-11-09,718-781-2831,0000000102,Active,2024-04-15 15:15:00,2024-04-15 15:00:00,85,128,...,False,Entered Into SPARS,False,2024-10-15,False,,NaT,,False,
1,620381,"Masri, Sarah",2017-10-15,347-350-3776,0000000152,Active,2023-07-09 19:30:00,2023-07-15 19:00:00,366,-152,...,False,Entered Into SPARS,True,2024-01-15,True,Entered Into SPARS,NaT,,False,
2,620421,"Ginsburg, Mattanya",1984-08-09,718-755-1077,0000000192,Active,2023-03-03 11:15:00,2023-03-15 11:00:00,494,-282,...,False,Entered Into SPARS,True,2023-09-15,True,Entered Into SPARS,NaT,,False,
3,620431,"Raitport, Sholom",2002-09-26,718-310-7547,0000000202,Active,2023-03-02 18:45:00,2023-03-15 18:00:00,495,-282,...,False,Entered Into SPARS,True,2023-09-15,True,Entered Into SPARS,NaT,,False,
4,620461,"Fischer, Sarah",1976-08-05,347-930-9550,0000000232,Active,2023-03-05 11:30:00,2023-03-15 11:00:00,492,-280,...,False,Entered Into SPARS,True,2023-09-15,True,Entered Into SPARS,NaT,,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
442,628647,"Lein, Zev",2015-01-13,646-410-3049,0000008412,Active,2024-05-28 10:00:00,2024-05-15 10:00:00,42,170,...,False,Entered Into SPARS,False,2024-11-15,False,,NaT,,False,
443,628657,"Teitelbaum, Simcha",2010-08-05,347-799-4524,0000008422,Active,2024-05-30 17:00:00,2024-05-15 17:00:00,40,173,...,False,Entered Into SPARS,False,2024-11-15,False,,NaT,,False,
444,628727,"Leitner, Rivka",1991-06-16,347-998-4601,0000008492,Active,2024-06-19 17:00:00,2024-06-15 17:00:00,20,193,...,False,Entered Into SPARS,False,2024-12-15,False,,NaT,,False,
445,628737,"Jacobowitz, Tilly",2017-08-14,718-384-1495,0000008502,Active,2024-06-26 16:00:00,2024-06-15 16:00:00,13,200,...,False,Completed,False,2024-12-15,False,,NaT,,False,


## Insync Anomalies

In [243]:
# looking for anomalies, NOMS == False but SPARS == True
try:    # Baseline anomalies
    baseline_SPARS = full_data_collection_df['Baseline SPARS'] == "Entered Into SPARS"
    baseline_NOMS = full_data_collection_df['Baseline NOMS'] == True
    Insync_baseline_anomalies = ~baseline_NOMS & baseline_SPARS
    Insync_baseline_anomalies = full_data_collection_df[Insync_baseline_anomalies]
    Insync_baseline_anomalies.insert(2,'Anomaly Description', "Baseline: Has SPARS, missing NOMS")

    logger.info(f"Successfully checked for baseline Anomalies.")
except Exception as e:
    logger.error(f"Failed to check for baseline Anomalies.", exc_info=True) 
    print(e)
    
try:        # reassessment anomalies
    reassessment_SPARS = full_data_collection_df['Reassessment SPARS'] == "Entered Into SPARS"
    reassessment_NOMS = full_data_collection_df['6 Month Reassessment NOMS'] == True
    Insync_reassessment_anomalies = ~reassessment_NOMS & reassessment_SPARS
    Insync_reassessment_anomalies = full_data_collection_df[Insync_reassessment_anomalies]
    Insync_reassessment_anomalies.insert(2,'Anomaly Description', "Reassessment: Has SPARS, missing NOMS")

    logger.info(f"Successfully checked for reassessment Anomalies.")
except Exception as e:
    logger.error(f"Failed to check for reassessment Anomalies.", exc_info=True) 
    print(e)
    
try:        # discharge anomalies
    discharge_SPARS = full_data_collection_df['Discharge SPARS'] == "Entered Into SPARS"
    discharge_NOMS = full_data_collection_df['Discharge NOMS'] == True
    Insync_discharge_anomalies = ~discharge_NOMS & discharge_SPARS
    Insync_discharge_anomalies = full_data_collection_df[Insync_discharge_anomalies]
    Insync_discharge_anomalies.insert(2,'Anomaly Description', "Discharge: Has SPARS, missing NOMS")

    logger.info(f"Successfully checked for discharge Anomalies.")
except Exception as e:
    logger.error(f"Failed to check for discharge Anomalies.", exc_info=True) 
    print(e)
    
try:        # concat anomalies into their own df
    anomaly_info = ['PatientID','Anomaly Description']
    Insync_anomalies = pd.concat([Insync_baseline_anomalies[anomaly_info],
                                  Insync_reassessment_anomalies[anomaly_info],
                                  Insync_discharge_anomalies[anomaly_info]])
    
    logger.info(f"Successfully created insync anomalies df.")
except Exception as e:
    logger.error(f"Failed to create insync anomalies df.", exc_info=True) 
    print(e)
    
Insync_anomalies

Unnamed: 0,PatientID,Anomaly Description
0,620331,"Baseline: Has SPARS, missing NOMS"
1,620381,"Baseline: Has SPARS, missing NOMS"
2,620421,"Baseline: Has SPARS, missing NOMS"
3,620431,"Baseline: Has SPARS, missing NOMS"
4,620461,"Baseline: Has SPARS, missing NOMS"
...,...,...
324,627097,"Discharge: Has SPARS, missing NOMS"
342,627327,"Discharge: Has SPARS, missing NOMS"
353,627467,"Discharge: Has SPARS, missing NOMS"
370,627657,"Discharge: Has SPARS, missing NOMS"


## Get Data from SPARS

In [244]:
try:     #read in SPARS excel
    SPARS_data_path = r"..\Data\SPARS Download 6-24-2024.xlsx" # does it download with date in name? will break if yes
    SPARS_data_df = pd.read_excel(SPARS_data_path)

    logger.info(f"Successfully read in SPARS excel.")
except Exception as e:
    logger.error(f"Failed to read in SPARS excel.", exc_info=True) 
    print(e)
# SPARS_data_df

## Filter down SPARS to match Insync 

In [245]:
try:     # filter down MRN numbers
    SPARS_data_df['ClientID'] = SPARS_data_df['ClientID'].str.replace("'","")
    MRN_mask  = SPARS_data_df['ClientID'].map(lambda value: True if value[-1] == "2" else False)
    SPARS_data_df = SPARS_data_df[MRN_mask]

    logger.info(f"Successfully filtered SPARS MRNs.")
except Exception as e:
    logger.error(f"Failed to filter SPARS MRNs.", exc_info=True) 
    print(e)

In [246]:
try:    # filter first encounters after march
    march_mask = SPARS_data_df['FirstReceivedServicesDate'] > '2023-03-01'
    SPARS_data_df = SPARS_data_df[march_mask]

    logger.info(f"Successfully filtered SPARS first encounters.")
except Exception as e:
    logger.error(f"Failed to filter SPARS first encounters.", exc_info=True) 
    print(e)

In [247]:
try:    # filter out other programs
    program_mask = SPARS_data_df['ClientID'].map(lambda value: True if value[0] == "0" else False)
    SPARS_data_df = SPARS_data_df[program_mask]
    
    logger.info(f"Successfully filtered SPARS other programs.")
except Exception as e:
    logger.error(f"Failed to filter SPARS other programs.", exc_info=True) 
    print(e)

## Select data

In [248]:
columns_wanted = ['ClientID','Assessment','CalculatedInterviewDate','ConductedInterview','WhyNotConducted']
assessment_dictionary = {
    "Baseline" : 600,
    "Reassessment": 601,
    "Discharge" : 699
}
interview_not_conducted_dictionary = {
    1 : "Not able to obtain consent from proxy",
    2 : "Client was impaired or unable to provide consent",
    3 : "Client refused this interview",
    4 : "Client was not reached for interview",
    5 : "Client refused all interviews",
    -1 : "Client was interviewed"
}

In [249]:
try:
    SPARS_assessments = pd.DataFrame(columns = columns_wanted) # create df for the desired data 
    for assessment in assessment_dictionary:
        assessment_df = SPARS_data_df[SPARS_data_df['Assessment'] == assessment_dictionary.get(assessment)] # copy in all data for current assessment type
        assessment_df = assessment_df.drop_duplicates(['ClientID']) # get rid of duplicates on name (shouldn't be needed but there was a duplicated baseline)
        assessment_df = assessment_df[columns_wanted] # remove all unneeded columns 
        assessment_df['Assessment'] = assessment # set assessment type values
        SPARS_assessments = pd.concat([SPARS_assessments[columns_wanted], # add current assessment data to all assessments' data
                                       assessment_df[columns_wanted]])
    SPARS_assessments.rename(columns={"ClientID": "MRN Number"},inplace=True)
    SPARS_assessments
    
    logger.info(f"Successfully selected useful SPARS data.")
except Exception as e:
    logger.error(f"Failed to select useful SPARS data.", exc_info=True) 
    print(e)

SPARS_assessments

  SPARS_assessments = pd.concat([SPARS_assessments[columns_wanted], # add current assessment data to all assessments' data


Unnamed: 0,MRN Number,Assessment,CalculatedInterviewDate,ConductedInterview,WhyNotConducted
0,0000001222,Baseline,2023-03-15,0,4
1,0000000942,Baseline,2023-03-15,0,2
2,0000002452,Baseline,2023-03-15,0,3
3,0000001922,Baseline,2023-03-15,0,2
4,0000000312,Baseline,2023-03-15,0,3
...,...,...,...,...,...
724,0000001732,Discharge,2024-02-15,1,-1
727,0000005912,Discharge,2024-01-15,1,-1
732,0000005532,Discharge,2024-03-15,1,-1
743,0000004642,Discharge,2024-04-15,1,-1


### Replace Values

In [250]:
try:    # replace values using dictionaries from SPARS
    for reason in interview_not_conducted_dictionary:
        SPARS_assessments['WhyNotConducted'] = SPARS_assessments['WhyNotConducted'].replace(reason, interview_not_conducted_dictionary.get(reason))

    logger.info(f"Successfully replaced values for SPARS not conducted.")
except Exception as e:
    logger.error(f"Failed to replace values for SPARS not conducted.", exc_info=True) 
    print(e)        

## Compare with Insync

In [251]:
try:    # which MRNs exist only Insync or SPARS but not both
    full_data_collection_df['SPARS MRN'] = full_data_collection_df['MRNNumber'].isin(SPARS_assessments['MRN Number'].drop_duplicates())
    SPARS_assessments['Insync MRN'] = SPARS_assessments['MRN Number'].isin(full_data_collection_df['MRNNumber'].drop_duplicates())
    
    logger.info(f"Successfully compared SPARS and Insync MRNs.")
except Exception as e:
    logger.error(f"Failed to compare SPARS and Insync MRNs.", exc_info=True) 
    print(e)

## Get download Date

In [252]:
try:    # add download date
    file_stat = os.stat(SPARS_data_path)

    # Check if st_birthtime attribute is available
    if hasattr(file_stat, 'st_birthtime'):
        # Use st_birthtime for the creation time
        creation_time = file_stat.st_birthtime
    else:
        # Fallback to st_mtime for the last modification time
        creation_time = file_stat.st_mtime
    
    # Convert the creation time to a datetime object
    creation_datetime = datetime.datetime.fromtimestamp(creation_time)
    # creation_datetime = datetime.fromtimestamp(creation_time)
    
    # Format the datetime object into MM/DD/YYYY
    creation_date_formatted = creation_datetime.strftime('%m/%d/%Y')
    
    SPARS_assessments['Download Date'] = creation_date_formatted

    logger.info(f"Successfully added SPARS download date.")
except Exception as e:
    logger.error(f"Failed to add SPARS download date.", exc_info=True) 
    print(e)

## Push to DB

In [253]:
# with pd.ExcelWriter(r"../data/NOMS Survey Completion.xlsx") as writer: 
#     full_data_collection_df.to_excel(writer, sheet_name="Patient Data",index = False)
#     consent_df.to_excel(writer, sheet_name="Consent Data",index = False)

In [254]:
# ", ".join([item + " " + str(full_data_collection_df[item].dtype) for item in full_data_collection_df.columns])

In [255]:
full_data_collection_df

Unnamed: 0,PatientID,Name,DOB,Phone,MRNNumber,Status,EncounterDate,Hour,Days Since Encounter,Days Till Reassessment Date,...,Baseline SPARS,6 Month Reassessment NOMS,6 Month Reassessment Date,6 Month Reassessment Warning,Reassessment SPARS,Final Encounter,Days Since Final Encounter,Discharge NOMS,Discharge SPARS,SPARS MRN
0,620331,"Werde, Chaya",2007-11-09,718-781-2831,0000000102,Active,2024-04-15 15:15:00,2024-04-15 15:00:00,85,128,...,Entered Into SPARS,False,2024-10-15,False,,NaT,,False,,True
1,620381,"Masri, Sarah",2017-10-15,347-350-3776,0000000152,Active,2023-07-09 19:30:00,2023-07-15 19:00:00,366,-152,...,Entered Into SPARS,True,2024-01-15,True,Entered Into SPARS,NaT,,False,,True
2,620421,"Ginsburg, Mattanya",1984-08-09,718-755-1077,0000000192,Active,2023-03-03 11:15:00,2023-03-15 11:00:00,494,-282,...,Entered Into SPARS,True,2023-09-15,True,Entered Into SPARS,NaT,,False,,True
3,620431,"Raitport, Sholom",2002-09-26,718-310-7547,0000000202,Active,2023-03-02 18:45:00,2023-03-15 18:00:00,495,-282,...,Entered Into SPARS,True,2023-09-15,True,Entered Into SPARS,NaT,,False,,True
4,620461,"Fischer, Sarah",1976-08-05,347-930-9550,0000000232,Active,2023-03-05 11:30:00,2023-03-15 11:00:00,492,-280,...,Entered Into SPARS,True,2023-09-15,True,Entered Into SPARS,NaT,,False,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
442,628647,"Lein, Zev",2015-01-13,646-410-3049,0000008412,Active,2024-05-28 10:00:00,2024-05-15 10:00:00,42,170,...,Entered Into SPARS,False,2024-11-15,False,,NaT,,False,,True
443,628657,"Teitelbaum, Simcha",2010-08-05,347-799-4524,0000008422,Active,2024-05-30 17:00:00,2024-05-15 17:00:00,40,173,...,Entered Into SPARS,False,2024-11-15,False,,NaT,,False,,True
444,628727,"Leitner, Rivka",1991-06-16,347-998-4601,0000008492,Active,2024-06-19 17:00:00,2024-06-15 17:00:00,20,193,...,Entered Into SPARS,False,2024-12-15,False,,NaT,,False,,False
445,628737,"Jacobowitz, Tilly",2017-08-14,718-384-1495,0000008502,Active,2024-06-26 16:00:00,2024-06-15 16:00:00,13,200,...,Completed,False,2024-12-15,False,,NaT,,False,,False


### Push full_data_collection (Insync data)

In [256]:

rename_mapping = {
    'Baseline Warning': 'BaselineWarning',
    'Baseline Due Date': 'BaselineDueDate',
    'Baseline NOMS': 'BaselineNOMS',
    'Days Since Encounter': 'DaysSinceEncounter',
    'Days Till Reassessment Date': 'DaysTillReassessment',
    'Days Till Baseline Date': 'DaysTillBaseline',
    '6 Month Reassessment NOMS': '_6MonthReassessmentNOMS',
    '6 Month Reassessment Date': '_6MonthReassessmentDate',
    '6 Month Reassessment Warning': '_6MonthReassessmentWarning',
    'Reassessment SPARS': 'ReassessmentSPARS',
    'Final Encounter': 'FinalEncounter',
    'Days Since Final Encounter': 'DaysSinceFinalEncounter',
    'Discharge NOMS': 'DischargeNOMS',
    'Discharge SPARS': 'DischargeSPARS',
}

# Rename columns using the mapping dictionary
full_data_collection_df = full_data_collection_df.rename(columns=rename_mapping)


In [257]:
full_data_collection_df.columns.tolist()

['PatientID',
 'Name',
 'DOB',
 'Phone',
 'MRNNumber',
 'Status',
 'EncounterDate',
 'Hour',
 'DaysSinceEncounter',
 'DaysTillReassessment',
 'DaysTillBaseline',
 'BaselineNOMS',
 'BaselineDueDate',
 'Baseline SPARS',
 '_6MonthReassessmentNOMS',
 '_6MonthReassessmentDate',
 'ReassessmentSPARS',
 'FinalEncounter',
 'DaysSinceFinalEncounter',
 'DischargeNOMS',
 'DischargeSPARS',
 'SPARS MRN']

In [258]:

full_data_collection_df = full_data_collection_df[['PatientID',
                            'Name',
                            'DOB', 
                            'Phone', 
                            'MRNNumber', 
                            'Status', 
                            'EncounterDate', 
                            'Hour', 
                            'DaysSinceEncounter', 
                            'DaysTillBaseline', 
                            'DaysTillReassessment', 
                            'BaselineNOMS', 
                            'BaselineDueDate', 
                            'BaselineWarning', 
                            'Baseline SPARS',
                            '_6MonthReassessmentNOMS', 
                            '_6MonthReassessmentDate', 
                            '_6MonthReassessmentWarning', 
                            'ReassessmentSPARS',
                            'FinalEncounter', 
                            'DaysSinceFinalEncounter', 
                            'DischargeNOMS', 
                            'DischargeSPARS', 
                            'SPARS MRN']]

table_name = "ptNOMS_Completion"
try:
    full_data_collection_df.to_sql("ptNOMS_Completion", conn, if_exists='replace', index = False)
    logger.info(f"Successfully pushed {table_name} to database.")            
except Exception as e:
    logger.error(f"Failed to push {table_name} to database.", exc_info=True) 
    print(e)


### Push consent_df  

In [259]:
# ", ".join([item + " " + str(consent_df[item].dtype) for item in consent_df.columns])

In [260]:
consent_df = consent_df[['PatientID',
                         'Consent',
                         'Reason']]

table_name = "ptNOMS_Consent"
try:
    consent_df.to_sql("ptNOMS_Consent", conn, if_exists='replace', index = False)
    logger.info(f"Successfully pushed {table_name} to database.")            
except Exception as e:
    logger.error(f"Failed to push {table_name} to database.", exc_info=True) 
    print(e)

### Push Insync Anomalies

In [261]:
Insync_anomalies = Insync_anomalies[['PatientID',
                         'Anomaly Description']]

table_name = "ptNOMS_Insync_Anomalies"
try:
    Insync_anomalies.to_sql("ptNOMS_Insync_Anomalies", conn, if_exists='replace', index = False)
    logger.info(f"Successfully pushed {table_name} to database.")            
except Exception as e:
    logger.error(f"Failed to push {table_name} to database.", exc_info=True) 
    print(e)

### Push SPARS Assessments


In [262]:
SPARS_assessments.columns.tolist()

['MRN Number',
 'Assessment',
 'CalculatedInterviewDate',
 'ConductedInterview',
 'WhyNotConducted',
 'Insync MRN',
 'Download Date']

In [263]:
rename_mapping = {
    'Assessment': 'Assessment Type',
    'ConductedInterview': 'Interview Conducted'
}

# Rename columns using the mapping dictionary
SPARS_assessments = SPARS_assessments.rename(columns=rename_mapping)

SPARS_assessments = SPARS_assessments[['MRN Number',
                                     'Assessment Type',
                                     'Interview Conducted',
                                     'WhyNotConducted',
                                     'Insync MRN',
                                     'Download Date']]

table_name = "ptNOMS_SPARS_Data"
try:
    SPARS_assessments.to_sql("ptNOMS_SPARS_Data", conn, if_exists='replace', index = False)
    logger.info(f"Successfully pushed {table_name} to database.")            
except Exception as e:
    logger.error(f"Failed to push {table_name} to database.", exc_info=True) 
    print(e)

In [264]:
conn.dispose()

In [265]:
# full_data_collection_df.to_clipboard()