## **Python Data Engineering Project**
##### *-- Anh Vi Pham --*

### **Setup Database**

#### Connect Server

In [188]:
import pyodbc
import json
import pandas as pd 
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)

# database credentials
server = 'localhost,1433' 
database = 'master' 
driver = '{ODBC Driver 18 for SQL Server}'
username = 'sa'
password = 'rainscales@2024'

# establish connecttion
conn = pyodbc.connect(f'DRIVER={driver};SERVER={server};DATABASE={database};UID={username};PWD={password};TrustServerCertificate=yes')
cursor = conn.cursor()

#### Create new database

In [189]:
def get_current_database(cursor=cursor):
    cursor.execute("SELECT DB_NAME() AS CurrentDatabase")
    current_db = cursor.fetchone()[0]
    return print(f"Currently connected to database: {current_db}")
get_current_database(cursor)

Currently connected to database: master


In [190]:
# drop database
conn.autocommit = True

# Drop the database if it exists
cursor.execute("IF EXISTS (SELECT * FROM sys.databases WHERE name = 'HospitalOperation') DROP DATABASE HospitalOperation")
print("Database 'HospitalOperation' dropped successfully if it existed.")

ProgrammingError: ('42000', '[42000] [Microsoft][ODBC Driver 18 for SQL Server][SQL Server]Cannot drop database "HospitalOperation" because it is currently in use. (3702) (SQLExecDirectW)')

In [None]:
conn.autocommit = True
try:
    cursor.execute("CREATE DATABASE HospitalOperation")
    print("Database 'HospitalOperation' created successfully.")
except Exception as e:
    print(f"Error: {e}")

conn.autocommit = False

Database 'HospitalOperation' created successfully.


In [None]:
# close connection to "master" database
conn.close()

#### Connect to new database

In [None]:
new_database_name = 'HospitalOperation'
conn = pyodbc.connect(f'DRIVER={driver};SERVER={server};DATABASE={new_database_name};UID={username};PWD={password};TrustServerCertificate=yes')
cursor = conn.cursor()

get_current_database(cursor)

Currently connected to database: HospitalOperation


#### Load database configuration file from table_creation_code.txt

In [None]:
def load_sql_from_txt(file_name: str, mode='r'):
    try:
        commands = []
        with open(file_name, mode) as file:
            imported_commands = file.read().split(';')
            for com in imported_commands:
                commands.append(com.replace('\n', ' ').strip())
        return commands
    except Exception as e:
        conn.close()
        return print(f'Failed to read commands from {file_name}  - \n Error: {e} \n Connection closed')

def execute_list_of_sql_commands(command_list, cursor, conn):
    try:
        for command in command_list:
            if command: 
                cursor.execute(command)

        conn.commit()
        return print(f'Commands excuted successfully')

    except Exception as e:
        conn.close()
        return print(f'Failed to execute commands - \n Error: {e} \n Connection closed')


In [None]:
db_file = 'table_creation_code.txt'
table_creation_codes = load_sql_from_txt(db_file)

execute_list_of_sql_commands(command_list=table_creation_codes, cursor=cursor, conn=conn)

Commands excuted successfully


#### Load data from sample_dataset.txt

In [None]:
data_file = 'sample_dataset.txt'
data_codes = load_sql_from_txt(data_file)

execute_list_of_sql_commands(command_list=data_codes, cursor=cursor, conn=conn)

Commands excuted successfully


### **Update Database Configuration**

In [None]:
get_current_database(cursor)

Currently connected to database: HospitalOperation


In [None]:
def query(sql_code, engine = conn):
    df_query = pd.read_sql(sql_code, engine)
    return df_query

#### Change data type: text to varchar(150)

In [None]:
def find_columns_by_data_type(data_type: str, conn):
    sql_code = f"""
    SELECT TABLE_NAME, COLUMN_NAME 
    FROM INFORMATION_SCHEMA.COLUMNS 
    WHERE DATA_TYPE = '{data_type}'
    """
    columns = query(sql_code, conn)
    return columns

data_type = 'text'
text_columns = find_columns_by_data_type(data_type, conn)
print(f"Columns with {data_type} data type:")
print(text_columns)

Columns with text data type:
      TABLE_NAME  COLUMN_NAME
0      Physician         Name
1      Physician     Position
2     Department         Name
3   NewProcedure         Name
4        Patient         Name
..           ...          ...
10    Medication         Name
11    Medication        Brand
12    Medication  Description
13    Prescribes         Dose
14          Room         Type

[15 rows x 2 columns]


In [None]:
def alter_text_to_varchar(conn, table_name, column_name, varchar_length=150):
    alter_sql = f"ALTER TABLE [{table_name}] ALTER COLUMN [{column_name}] VARCHAR({varchar_length})"
    cursor = conn.cursor()
    cursor.execute(alter_sql)
    conn.commit()
    print(f"Column '{column_name}' in table '{table_name}' changed to VARCHAR({varchar_length})")

# Execute the change for each identified column
for index, row in text_columns.iterrows():
    alter_text_to_varchar(conn, row['TABLE_NAME'], row['COLUMN_NAME'])


Column 'Name' in table 'Physician' changed to VARCHAR(150)
Column 'Position' in table 'Physician' changed to VARCHAR(150)
Column 'Name' in table 'Department' changed to VARCHAR(150)
Column 'Name' in table 'NewProcedure' changed to VARCHAR(150)
Column 'Name' in table 'Patient' changed to VARCHAR(150)
Column 'Address' in table 'Patient' changed to VARCHAR(150)
Column 'Phone' in table 'Patient' changed to VARCHAR(150)
Column 'Name' in table 'Nurse' changed to VARCHAR(150)
Column 'Position' in table 'Nurse' changed to VARCHAR(150)
Column 'ExaminationRoom' in table 'Appointment' changed to VARCHAR(150)
Column 'Name' in table 'Medication' changed to VARCHAR(150)
Column 'Brand' in table 'Medication' changed to VARCHAR(150)
Column 'Description' in table 'Medication' changed to VARCHAR(150)
Column 'Dose' in table 'Prescribes' changed to VARCHAR(150)
Column 'Type' in table 'Room' changed to VARCHAR(150)


### **Data Transform: Patient Interaction**

#### Query data

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
df = query(""" 
        SELECT
        -- Patient table
        Patient.SSN AS PatientSSN,
        Patient.Name AS PatientName,
        Patient.Address AS PatientAddress,
        Patient.Phone AS PatientPhone,
        Patient.InsuranceID AS PatientInsuranceID,
        Patient.PCP AS PatientPCP,

        -- Appointment table
        Appointment.AppointmentID,
        Appointment.Patient AS AppointmentPatient,
        Appointment.Start AS AppointmentStart,
        Appointment.[End] AS AppointmentEnd,
        Appointment.ExaminationRoom AS AppointmentExaminationRoom,
        Appointment_Physician.EmployeeID AS AppointmentPhysicianID,
        Appointment_Physician.Name AS AppointmentPhysicianName,
        Appointment_Physician.Position AS AppointmentPhysicianPosition,
        Appointment_Nurse.EmployeeID AS AppointmentNurseID,
        Appointment_Nurse.Name AS AppointmentNurseName,
        Appointment_Nurse.Position AS AppointmentNursePosition,

        -- Prescribes table
        Prescribes.Physician AS PrescribingPhysician,
        Prescribes.Patient AS PrescriptionPatient,
        Prescribes.Medication AS PrescriptionMedication,
        Prescribes.Date AS PrescriptionDate,
        Prescribes.Appointment AS PrescriptionAppointment,
        Prescribes.Dose AS PrescriptionDose,
        Prescribes_Physician.EmployeeID AS PrescribingPhysicianID,
        Prescribes_Physician.Name AS PrescribingPhysicianName,
        Prescribes_Physician.Position AS PrescribingPhysicianPosition,

        -- Medication table
        Medication.Code AS MedicationCode,
        Medication.Name AS MedicationName,
        Medication.Brand AS MedicationBrand,
        Medication.Description AS MedicationDescription,

        -- Undergoes table
        Undergoes.Patient AS UndergoesPatient,
        Undergoes.NewProcedure AS UndergoesNewProcedure,
        Undergoes.Stay AS UndergoesStayID,
        Undergoes.Date AS UndergoesDate,
        Undergoes.Physician AS UndergoesPhysicianID,
        Undergoes.AssistingNurse AS UndergoesNurseID,

        -- NewProcedure table
        NewProcedure.Code AS NewProcedureCode,
        NewProcedure.Name AS NewProcedureName,
        NewProcedure.Cost AS NewProcedureCost,

        -- Stay table
        Stay.StayID AS StayID,
        Stay.Patient AS StayPatient,
        Stay.Room AS StayRoom,
        Stay.Start AS StayStart,
        Stay.[End] AS StayEnd,

        -- Room table
        Room.Number AS RoomNumber,
        Room.Type AS RoomType,
        Room.BlockFloor AS RoomBlockFloor,
        Room.BlockCode AS RoomBlockCode,
        Room.Unavailable AS RoomUnavailable,

        -- Block table
        Block.Floor AS BlockFloor,
        Block.Code AS BlockCode,

        -- On_Call table
        On_Call.Nurse AS OnCallNurse,
        On_Call.BlockFloor AS OnCallBlockFloor,
        On_Call.BlockCode AS OnCallBlockCode,
        On_Call.Start AS OnCallStart,
        On_Call.[End] AS OnCallEnd,
        OnCall_Nurse.EmployeeID AS OnCallNurseID,
        OnCall_Nurse.Name AS OnCallNurseName,
        OnCall_Nurse.Position AS OnCallNursePosition,
        OnCall_Nurse.Registered AS OnCallNurseRegistered
        FROM dbo.Patient
        -- Join with Appointment and related tables
        LEFT JOIN dbo.Appointment
        ON Patient.SSN = Appointment.Patient
        LEFT JOIN dbo.Physician AS Appointment_Physician
        ON Appointment.Physician = Appointment_Physician.EmployeeID
        LEFT JOIN dbo.Nurse AS Appointment_Nurse
        ON Appointment.PrepNurse = Appointment_Nurse.EmployeeID

        -- Join with Prescribes and related tables
        LEFT JOIN dbo.Prescribes
        ON Appointment.AppointmentID = Prescribes.Appointment
        LEFT JOIN dbo.Physician AS Prescribes_Physician
        ON Prescribes.Physician = Prescribes_Physician.EmployeeID
        LEFT JOIN dbo.Medication
        ON Prescribes.Medication = Medication.Code

        -- Join with Undergoes, NewProcedure, and related tables
        LEFT JOIN dbo.Undergoes
        ON Patient.SSN = Undergoes.Patient
        LEFT JOIN dbo.NewProcedure
        ON Undergoes.NewProcedure = NewProcedure.Code
        LEFT JOIN dbo.Stay
        ON Undergoes.Stay = Stay.StayID
        LEFT JOIN dbo.Room
        ON Stay.Room = Room.Number

        -- Join with Block and On_Call tables
        LEFT JOIN dbo.Block
        ON Room.BlockCode = Block.Code
        LEFT JOIN dbo.On_Call
        ON Block.Code = On_Call.BlockCode AND Block.Floor = On_Call.BlockFloor
        LEFT JOIN dbo.Nurse AS OnCall_Nurse
        ON On_Call.Nurse = OnCall_Nurse.EmployeeID;

""")
df.head(3)

Unnamed: 0,PatientSSN,PatientName,PatientAddress,PatientPhone,PatientInsuranceID,PatientPCP,AppointmentID,AppointmentPatient,AppointmentStart,AppointmentEnd,AppointmentExaminationRoom,AppointmentPhysicianID,AppointmentPhysicianName,AppointmentPhysicianPosition,AppointmentNurseID,AppointmentNurseName,AppointmentNursePosition,PrescribingPhysician,PrescriptionPatient,PrescriptionMedication,PrescriptionDate,PrescriptionAppointment,PrescriptionDose,PrescribingPhysicianID,PrescribingPhysicianName,PrescribingPhysicianPosition,MedicationCode,MedicationName,MedicationBrand,MedicationDescription,UndergoesPatient,UndergoesNewProcedure,UndergoesStayID,UndergoesDate,UndergoesPhysicianID,UndergoesNurseID,NewProcedureCode,NewProcedureName,NewProcedureCost,StayID,StayPatient,StayRoom,StayStart,StayEnd,RoomNumber,RoomType,RoomBlockFloor,RoomBlockCode,RoomUnavailable,BlockFloor,BlockCode,OnCallNurse,OnCallBlockFloor,OnCallBlockCode,OnCallStart,OnCallEnd,OnCallNurseID,OnCallNurseName,OnCallNursePosition,OnCallNurseRegistered
0,100000001,John Smith,42 Foobar Lane,555-0256,68476213,1,13216584,100000001,2008-04-24 10:00:00,2008-04-24 11:00:00,A,1,John Dorian,Staff Internist,101.0,Carla Espinosa,Head Nurse,1.0,100000001.0,1.0,2008-04-24 10:47:00,13216584.0,5,1.0,John Dorian,Staff Internist,1.0,Procrastin-X,X,,100000001.0,2.0,3215.0,2008-05-03,7.0,101.0,2.0,Obtuse Pyloric Recombobulation,3750.0,3215.0,100000001.0,111.0,2008-05-01,2008-05-04,111.0,Single,1.0,2.0,False,1.0,2.0,101.0,1.0,2.0,2008-11-04 11:00:00,2008-11-04 19:00:00,101.0,Carla Espinosa,Head Nurse,True
1,100000001,John Smith,42 Foobar Lane,555-0256,68476213,1,13216584,100000001,2008-04-24 10:00:00,2008-04-24 11:00:00,A,1,John Dorian,Staff Internist,101.0,Carla Espinosa,Head Nurse,1.0,100000001.0,1.0,2008-04-24 10:47:00,13216584.0,5,1.0,John Dorian,Staff Internist,1.0,Procrastin-X,X,,100000001.0,2.0,3215.0,2008-05-03,7.0,101.0,2.0,Obtuse Pyloric Recombobulation,3750.0,3215.0,100000001.0,111.0,2008-05-01,2008-05-04,111.0,Single,1.0,2.0,False,1.0,2.0,103.0,1.0,2.0,2008-11-04 19:00:00,2008-11-05 03:00:00,103.0,Paul Flowers,Nurse,False
2,100000001,John Smith,42 Foobar Lane,555-0256,68476213,1,13216584,100000001,2008-04-24 10:00:00,2008-04-24 11:00:00,A,1,John Dorian,Staff Internist,101.0,Carla Espinosa,Head Nurse,1.0,100000001.0,1.0,2008-04-24 10:47:00,13216584.0,5,1.0,John Dorian,Staff Internist,1.0,Procrastin-X,X,,100000001.0,2.0,3215.0,2008-05-03,7.0,101.0,2.0,Obtuse Pyloric Recombobulation,3750.0,3215.0,100000001.0,111.0,2008-05-01,2008-05-04,111.0,Single,1.0,2.0,False,2.0,2.0,,,,NaT,NaT,,,,


#### Explore data

In [None]:
df.shape

(93, 60)

In [None]:
df.dtypes

PatientSSN                        int64
PatientName                      object
PatientAddress                   object
PatientPhone                     object
PatientInsuranceID                int64
                              ...      
OnCallEnd                datetime64[ns]
OnCallNurseID                   float64
OnCallNurseName                  object
OnCallNursePosition              object
OnCallNurseRegistered            object
Length: 60, dtype: object

In [None]:
df.duplicated().sum()

0

In [None]:
pd.set_option('display.max_rows', 60)
df.isnull().sum()

PatientSSN                       0
PatientName                      0
PatientAddress                   0
PatientPhone                     0
PatientInsuranceID               0
PatientPCP                       0
AppointmentID                    0
AppointmentPatient               0
AppointmentStart                 0
AppointmentEnd                   0
AppointmentExaminationRoom       0
AppointmentPhysicianID           0
AppointmentPhysicianName         0
AppointmentPhysicianPosition     0
AppointmentNurseID              30
AppointmentNurseName            30
AppointmentNursePosition        30
PrescribingPhysician            63
PrescriptionPatient             63
PrescriptionMedication          63
PrescriptionDate                63
PrescriptionAppointment         63
PrescriptionDose                63
PrescribingPhysicianID          63
PrescribingPhysicianName        63
PrescribingPhysicianPosition    63
MedicationCode                  63
MedicationName                  63
MedicationBrand     

In [None]:
df.columns

Index(['PatientSSN', 'PatientName', 'PatientAddress', 'PatientPhone',
       'PatientInsuranceID', 'PatientPCP', 'AppointmentID',
       'AppointmentPatient', 'AppointmentStart', 'AppointmentEnd',
       'AppointmentExaminationRoom', 'AppointmentPhysicianID',
       'AppointmentPhysicianName', 'AppointmentPhysicianPosition',
       'AppointmentNurseID', 'AppointmentNurseName',
       'AppointmentNursePosition', 'PrescribingPhysician',
       'PrescriptionPatient', 'PrescriptionMedication', 'PrescriptionDate',
       'PrescriptionAppointment', 'PrescriptionDose', 'PrescribingPhysicianID',
       'PrescribingPhysicianName', 'PrescribingPhysicianPosition',
       'MedicationCode', 'MedicationName', 'MedicationBrand',
       'MedicationDescription', 'UndergoesPatient', 'UndergoesNewProcedure',
       'UndergoesStayID', 'UndergoesDate', 'UndergoesPhysicianID',
       'UndergoesNurseID', 'NewProcedureCode', 'NewProcedureName',
       'NewProcedureCost', 'StayID', 'StayPatient', 'StayRoom', '

In [None]:
df[df['UndergoesPatient'].isnull()]

Unnamed: 0,PatientSSN,PatientName,PatientAddress,PatientPhone,PatientInsuranceID,PatientPCP,AppointmentID,AppointmentPatient,AppointmentStart,AppointmentEnd,AppointmentExaminationRoom,AppointmentPhysicianID,AppointmentPhysicianName,AppointmentPhysicianPosition,AppointmentNurseID,AppointmentNurseName,AppointmentNursePosition,PrescribingPhysician,PrescriptionPatient,PrescriptionMedication,PrescriptionDate,PrescriptionAppointment,PrescriptionDose,PrescribingPhysicianID,PrescribingPhysicianName,PrescribingPhysicianPosition,MedicationCode,MedicationName,MedicationBrand,MedicationDescription,UndergoesPatient,UndergoesNewProcedure,UndergoesStayID,UndergoesDate,UndergoesPhysicianID,UndergoesNurseID,NewProcedureCode,NewProcedureName,NewProcedureCost,StayID,StayPatient,StayRoom,StayStart,StayEnd,RoomNumber,RoomType,RoomBlockFloor,RoomBlockCode,RoomUnavailable,BlockFloor,BlockCode,OnCallNurse,OnCallBlockFloor,OnCallBlockCode,OnCallStart,OnCallEnd,OnCallNurseID,OnCallNurseName,OnCallNursePosition,OnCallNurseRegistered
45,100000002,Grace Ritchie,37 Snafu Drive,555-0512,36546321,2,26548913,100000002,2008-04-24 10:00:00,2008-04-24 11:00:00,B,2,Elliot Reid,Attending Physician,101.0,Carla Espinosa,Head Nurse,,,,NaT,,,,,,,,,,,,,NaT,,,,,,,,,NaT,NaT,,,,,,,,,,,NaT,NaT,,,,
46,100000002,Grace Ritchie,37 Snafu Drive,555-0512,36546321,2,93216548,100000002,2008-04-27 10:00:00,2008-04-27 11:00:00,B,2,Elliot Reid,Attending Physician,101.0,Carla Espinosa,Head Nurse,,,,NaT,,,,,,,,,,,,,NaT,,,,,,,,,NaT,NaT,,,,,,,,,,,NaT,NaT,,,,
47,100000003,Random J. Patient,101 Omgbbq Street,555-1204,65465421,2,69879231,100000003,2008-04-26 11:00:00,2008-04-26 12:00:00,C,2,Elliot Reid,Attending Physician,103.0,Paul Flowers,Nurse,,,,NaT,,,,,,,,,,,,,NaT,,,,,,,,,NaT,NaT,,,,,,,,,,,NaT,NaT,,,,


#### Listing problems
- Missing values: could be split into 2 cases
    - Case 1: columns that can be treated indepently  
        - Appointment Nurse: assume that those appointments didn't involve any nurse  
        -> missing values could be replaced with "No nurse involved"  

        - Undergoes Nurse: similar to above  

        - On_Call: assume that the patient has no one on call for them  
        -> "Service Not Available"

    - Case 2: columns that relies on the existence of other columns.  
    Ex: prescribe relies on appointments, medication relies on prescibes  
        - Prescribe: if missing in Appointtment -> missing in Prescribe
        -> "No prescription issued"  
        -> Otherwise, consider abnormal -> load to AbnormalDB


        -  Medication: if missing in Prescribe -> missing in Medication
        -> "No prescription issued"  
        -> Otherwise, consider abnormal -> load to AbnormalDB  

        - Nurse On_Call: if missing in On_Call -> missing in OnCallNurse  
        -> "Not On Call"




- Data type for columns needed to be changed:
    - Some ID columns are represented as "float" instead of "int" since they contains null 
    -> handle missing values and transform into int for consistency
- Create primary key

#### Transform data

##### Missing values

In [None]:
def fill_null_without_condition_column(data, replace_columns: list, replace_value):
    for column in replace_columns:
        data[column] = data[column].fillna(replace_value)
    return data 

def fill_null_with_condition_column(data, condition_column, replace_columns: list, replace_value, note: str, keep=False):
    # dataframe to store abnormal data
    data_abnormal = pd.DataFrame()
    for column in replace_columns:
        # fill if missing in condition_column and the condition_column
        data.loc[(data[condition_column].isnull()) & (data[column].isnull()), column] = replace_value
        # extract abnormal data
        abnormal_records = data[(data[condition_column].notnull()) & (data[column].isnull())]

        # add note for abnormal records
        if not abnormal_records.empty:
            abnormal_records['Note'] = note
        # append abnormal records
        data_abnormal = pd.concat([data_abnormal, abnormal_records])

    # keep or drop abnormal
    if not keep:    
        data = data.drop(data_abnormal.index)

    return data, data_abnormal



# def handle_missing_data(data, columns_without_condition: list, columns_with_condition: list):
#     df_cleaned, df_abnormal = 



# filling nurse-related columns in Appointments
nurse_columns_app = ['AppointmentNurseID', 'AppointmentNurseName','AppointmentNursePosition']
df = fill_null_without_condition_column(data=df, replace_columns=nurse_columns_app, replace_value='No nurse involved')

# filling columns in Prescribe
prescribe_columns = ['PrescribingPhysician',
       'PrescriptionPatient', 'PrescriptionMedication', 'PrescriptionDate',
       'PrescriptionAppointment', 'PrescriptionDose', 'PrescribingPhysicianID',
       'PrescribingPhysicianName', 'PrescribingPhysicianPosition']
condition_column_pres = 'AppointmentID'
df, df_abnormal = fill_null_with_condition_column(data=df, condition_column=condition_column_pres, replace_columns=prescribe_columns, replace_value='No prescription issued', note='Has Prescribe but no Appointment')

# filling columns in Medication
medication_columns = ['MedicationCode', 'MedicationName', 'MedicationBrand','MedicationDescription']
condition_column_medi = 'PrescriptionAppointment'
df, df_abnormal = fill_null_with_condition_column(data=df, condition_column=condition_column_medi, replace_columns=medication_columns, replace_value='No prescription issued', note='Has Medication but no Prescibe')

# filling nurse-related columns in Undergoes
nurse_columns_under = ['UndergoesNurseID']
df = fill_null_without_condition_column(data=df, replace_columns=nurse_columns_under, replace_value='No nurse involved')

# filling columns in On_Call
oncall_columns = ['OnCallNurse','OnCallBlockFloor', 'OnCallBlockCode', 'OnCallStart', 'OnCallEnd']
df = fill_null_without_condition_column(data=df, replace_columns=oncall_columns, replace_value='Service Not Available')

# filling columns in On_Call
nurse_oncall_columns = ['OnCallNurseID', 'OnCallNurseName', 'OnCallNursePosition','OnCallNurseRegistered']
condition_column_nurse_oncall = 'OnCallNurse'
df, df_abnormal = fill_null_with_condition_column(data=df, condition_column=condition_column_nurse_oncall, replace_columns=nurse_oncall_columns, replace_value='Off Duty', note='Has NurseOnCall but no OnCall')


df.isnull().sum()

PatientSSN                      0
PatientName                     0
PatientAddress                  0
PatientPhone                    0
PatientInsuranceID              0
PatientPCP                      0
AppointmentID                   0
AppointmentPatient              0
AppointmentStart                0
AppointmentEnd                  0
AppointmentExaminationRoom      0
AppointmentPhysicianID          0
AppointmentPhysicianName        0
AppointmentPhysicianPosition    0
AppointmentNurseID              0
AppointmentNurseName            0
AppointmentNursePosition        0
PrescribingPhysician            0
PrescriptionPatient             0
PrescriptionMedication          0
PrescriptionDate                0
PrescriptionAppointment         0
PrescriptionDose                0
PrescribingPhysicianID          0
PrescribingPhysicianName        0
PrescribingPhysicianPosition    0
MedicationCode                  0
MedicationName                  0
MedicationBrand                 0
MedicationDesc

In [None]:
df_abnormal

Unnamed: 0,PatientSSN,PatientName,PatientAddress,PatientPhone,PatientInsuranceID,PatientPCP,AppointmentID,AppointmentPatient,AppointmentStart,AppointmentEnd,AppointmentExaminationRoom,AppointmentPhysicianID,AppointmentPhysicianName,AppointmentPhysicianPosition,AppointmentNurseID,AppointmentNurseName,AppointmentNursePosition,PrescribingPhysician,PrescriptionPatient,PrescriptionMedication,PrescriptionDate,PrescriptionAppointment,PrescriptionDose,PrescribingPhysicianID,PrescribingPhysicianName,PrescribingPhysicianPosition,MedicationCode,MedicationName,MedicationBrand,MedicationDescription,UndergoesPatient,UndergoesNewProcedure,UndergoesStayID,UndergoesDate,UndergoesPhysicianID,UndergoesNurseID,NewProcedureCode,NewProcedureName,NewProcedureCost,StayID,StayPatient,StayRoom,StayStart,StayEnd,RoomNumber,RoomType,RoomBlockFloor,RoomBlockCode,RoomUnavailable,BlockFloor,BlockCode,OnCallNurse,OnCallBlockFloor,OnCallBlockCode,OnCallStart,OnCallEnd,OnCallNurseID,OnCallNurseName,OnCallNursePosition,OnCallNurseRegistered,Note
2,100000001,John Smith,42 Foobar Lane,555-0256,68476213,1,13216584,100000001,2008-04-24 10:00:00,2008-04-24 11:00:00,A,1,John Dorian,Staff Internist,101.0,Carla Espinosa,Head Nurse,1.0,100000001.0,1.0,2008-04-24 10:47:00,13216584.0,5,1.0,John Dorian,Staff Internist,1.0,Procrastin-X,X,,100000001.0,2.0,3215.0,2008-05-03,7.0,101.0,2.0,Obtuse Pyloric Recombobulation,3750.0,3215.0,100000001.0,111.0,2008-05-01,2008-05-04,111.0,Single,1.0,2.0,False,2.0,2.0,Service Not Available,Service Not Available,Service Not Available,Service Not Available,Service Not Available,,,,,Has NurseOnCall but no OnCall
3,100000001,John Smith,42 Foobar Lane,555-0256,68476213,1,13216584,100000001,2008-04-24 10:00:00,2008-04-24 11:00:00,A,1,John Dorian,Staff Internist,101.0,Carla Espinosa,Head Nurse,1.0,100000001.0,1.0,2008-04-24 10:47:00,13216584.0,5,1.0,John Dorian,Staff Internist,1.0,Procrastin-X,X,,100000001.0,2.0,3215.0,2008-05-03,7.0,101.0,2.0,Obtuse Pyloric Recombobulation,3750.0,3215.0,100000001.0,111.0,2008-05-01,2008-05-04,111.0,Single,1.0,2.0,False,3.0,2.0,Service Not Available,Service Not Available,Service Not Available,Service Not Available,Service Not Available,,,,,Has NurseOnCall but no OnCall
4,100000001,John Smith,42 Foobar Lane,555-0256,68476213,1,13216584,100000001,2008-04-24 10:00:00,2008-04-24 11:00:00,A,1,John Dorian,Staff Internist,101.0,Carla Espinosa,Head Nurse,1.0,100000001.0,1.0,2008-04-24 10:47:00,13216584.0,5,1.0,John Dorian,Staff Internist,1.0,Procrastin-X,X,,100000001.0,2.0,3215.0,2008-05-03,7.0,101.0,2.0,Obtuse Pyloric Recombobulation,3750.0,3215.0,100000001.0,111.0,2008-05-01,2008-05-04,111.0,Single,1.0,2.0,False,4.0,2.0,Service Not Available,Service Not Available,Service Not Available,Service Not Available,Service Not Available,,,,,Has NurseOnCall but no OnCall
7,100000001,John Smith,42 Foobar Lane,555-0256,68476213,1,13216584,100000001,2008-04-24 10:00:00,2008-04-24 11:00:00,A,1,John Dorian,Staff Internist,101.0,Carla Espinosa,Head Nurse,1.0,100000001.0,1.0,2008-04-24 10:47:00,13216584.0,5,1.0,John Dorian,Staff Internist,1.0,Procrastin-X,X,,100000001.0,6.0,3215.0,2008-05-02,3.0,101.0,6.0,Reversible Pancreomyoplasty,5600.0,3215.0,100000001.0,111.0,2008-05-01,2008-05-04,111.0,Single,1.0,2.0,False,2.0,2.0,Service Not Available,Service Not Available,Service Not Available,Service Not Available,Service Not Available,,,,,Has NurseOnCall but no OnCall
8,100000001,John Smith,42 Foobar Lane,555-0256,68476213,1,13216584,100000001,2008-04-24 10:00:00,2008-04-24 11:00:00,A,1,John Dorian,Staff Internist,101.0,Carla Espinosa,Head Nurse,1.0,100000001.0,1.0,2008-04-24 10:47:00,13216584.0,5,1.0,John Dorian,Staff Internist,1.0,Procrastin-X,X,,100000001.0,6.0,3215.0,2008-05-02,3.0,101.0,6.0,Reversible Pancreomyoplasty,5600.0,3215.0,100000001.0,111.0,2008-05-01,2008-05-04,111.0,Single,1.0,2.0,False,3.0,2.0,Service Not Available,Service Not Available,Service Not Available,Service Not Available,Service Not Available,,,,,Has NurseOnCall but no OnCall
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,100000004,Dennis Doe,1100 Foobaz Avenue,555-2048,68421879,3,86213939,100000004,2008-04-27 10:00:00,2008-04-21 11:00:00,A,9,Molly Clock,Attending Psychiatrist,102.0,Laverne Roberts,Nurse,9.0,100000004.0,2.0,2008-04-27 10:53:00,86213939.0,10,9.0,Molly Clock,Attending Psychiatrist,2.0,Thesisin,Foo Labs,,100000004.0,4.0,3217.0,2008-05-13,3.0,103.0,4.0,Complete Walletectomy,10000.0,3217.0,100000004.0,112.0,2008-05-02,2008-05-03,112.0,Single,1.0,2.0,True,3.0,2.0,Service Not Available,Service Not Available,Service Not Available,Service Not Available,Service Not Available,,,,,Has NurseOnCall but no OnCall
87,100000004,Dennis Doe,1100 Foobaz Avenue,555-2048,68421879,3,86213939,100000004,2008-04-27 10:00:00,2008-04-21 11:00:00,A,9,Molly Clock,Attending Psychiatrist,102.0,Laverne Roberts,Nurse,9.0,100000004.0,2.0,2008-04-27 10:53:00,86213939.0,10,9.0,Molly Clock,Attending Psychiatrist,2.0,Thesisin,Foo Labs,,100000004.0,4.0,3217.0,2008-05-13,3.0,103.0,4.0,Complete Walletectomy,10000.0,3217.0,100000004.0,112.0,2008-05-02,2008-05-03,112.0,Single,1.0,2.0,True,4.0,2.0,Service Not Available,Service Not Available,Service Not Available,Service Not Available,Service Not Available,,,,,Has NurseOnCall but no OnCall
90,100000004,Dennis Doe,1100 Foobaz Avenue,555-2048,68421879,3,86213939,100000004,2008-04-27 10:00:00,2008-04-21 11:00:00,A,9,Molly Clock,Attending Psychiatrist,102.0,Laverne Roberts,Nurse,9.0,100000004.0,2.0,2008-04-27 10:53:00,86213939.0,10,9.0,Molly Clock,Attending Psychiatrist,2.0,Thesisin,Foo Labs,,100000004.0,5.0,3217.0,2008-05-09,6.0,No nurse involved,5.0,Obfuscated Dermogastrotomy,4899.0,3217.0,100000004.0,112.0,2008-05-02,2008-05-03,112.0,Single,1.0,2.0,True,2.0,2.0,Service Not Available,Service Not Available,Service Not Available,Service Not Available,Service Not Available,,,,,Has NurseOnCall but no OnCall
91,100000004,Dennis Doe,1100 Foobaz Avenue,555-2048,68421879,3,86213939,100000004,2008-04-27 10:00:00,2008-04-21 11:00:00,A,9,Molly Clock,Attending Psychiatrist,102.0,Laverne Roberts,Nurse,9.0,100000004.0,2.0,2008-04-27 10:53:00,86213939.0,10,9.0,Molly Clock,Attending Psychiatrist,2.0,Thesisin,Foo Labs,,100000004.0,5.0,3217.0,2008-05-09,6.0,No nurse involved,5.0,Obfuscated Dermogastrotomy,4899.0,3217.0,100000004.0,112.0,2008-05-02,2008-05-03,112.0,Single,1.0,2.0,True,3.0,2.0,Service Not Available,Service Not Available,Service Not Available,Service Not Available,Service Not Available,,,,,Has NurseOnCall but no OnCall


### **Close Connection**

In [None]:
conn.close()