In [307]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [308]:
#upload all dataframes

patient_core_df = pd.read_csv('PatientCorePopulatedTable.txt', sep='\t')
admissions_core_df = pd.read_csv('AdmissionsCorePopulatedTable.txt', sep='\t')
admissions_diagnoses_core_df = pd.read_csv('AdmissionsDiagnosesCorePopulatedTable.txt', sep='\t')

In [309]:
# combine into one df on basis of patient ID

combined_df = patient_core_df.merge(admissions_core_df, on='PatientID')
combined_df = combined_df.merge(admissions_diagnoses_core_df, on='PatientID')

In [310]:
# generate drug column
# a set of hypothetical drugs are assigned to each disease. 
# These are chosen randomly to be "administered" for demo purposes

dict_disease_codes = dict.fromkeys(combined_df.PrimaryDiagnosisCode.unique())

num = 0
for each in dict_disease_codes.keys():
    dict_disease_codes[each] = [num, num+1]
    num +=2

combined_df['drug1'] = ""
for row in range(0, len(combined_df)):
    combined_df['drug1'][row] = np.random.choice(dict_disease_codes[combined_df['PrimaryDiagnosisCode'][row]])
    

In [322]:
# generate outcome column (random for now)

outcomes = {4: 'best', 3: 'better', 2: 'no effect', 1: 'adverse'}
for i in range (0, len(combined_df)):
    combined_df['outcomes'][i] = np.random.choice([1, 2, 3, 4])

In [323]:
# add age column, calculated by year - birth year

combined_df['Age'] = ''

for i in range(0, len(combined_df)):
    combined_df['Age'][i] = 2020 - int(combined_df['PatientDateOfBirth'][i].split(' ')[0].split('-')[0])


In [324]:
# For physician search:

# input: physician query (age, primarydiagnosiscode)
# output: (drug1, drug2, drug3) .. ranked by (drug_weighted_metric)

In [325]:
import math

# Used to calculate patient similarity
# later build in more complex features like similar diagnoses

def patient_similarity(patient1, patient2):
    return (math.abs(patient1.age - patient2.age)/max(patient1.age, patient2.age))
    

In [370]:

# Updates summary table ("insights table") for physician with each new instance found

def update_table(df, match):
    
    # first instance
    last_index = len(df)
    if(match['drug1'] not in df['drug_name'].unique()):
        match = match.drop(['PatientGender', 'PatientDateOfBirth', 'PatientRace', 'PatientMaritalStatus', 
                   'PatientLanguage', 'PatientPopulationPercentageBelowPoverty', 'AdmissionID_x', 'AdmissionStartDate', 'AdmissionEndDate', 
                   'AdmissionID_y', 'PrimaryDiagnosisCode', 'PrimaryDiagnosisDescription'], axis=0)
        match['num_instances'] = 1

        match = match.rename({'drug1': 'drug_name', 'outcomes': 'avg_efficacy', 'Age': 'avg_age'})
        df = df.append(match, ignore_index=True)
        
        #df['patient_ids'][match['drug_name']] = 1
        
        #df['patient_ids'][match['drug_name']].append(match['PatientID'])
        
    # update table for more uses
    else:
        avg_efficacy = df['avg_efficacy'][match['drug1']]
        avg_age = df['avg_age'][match['drug1']]
        num_instances = df['num_instances'][match['drug1']]
        
        
        df['avg_efficacy'][match['drug1']] = (avg_efficacy*num_instances + match['outcomes']) / (num_instances + 1)
        df['avg_age'][match['drug1']] = (avg_age*num_instances + match['Age']) / (num_instances + 1)
        df['num_instances'][match['drug1']] +=1  
        #df['patient_ids'][match['drug1']].append(match['PatientID'])

    return df

# Search function for physician 

def search(patient_age, patient_diagnosis):
    num_unique_patients = 0
    patient_ids_found = []
    # get matches
    matches = combined_df.loc[combined_df['PrimaryDiagnosisCode'] == patient_diagnosis]
    
    # create new dataframe for summary to be displayed to physician
    summarized_drug_matches = pd.DataFrame(columns=['drug_name', 'num_instances', 'avg_efficacy', 'avg_age'])
        
    # create new dataframe with all results in case physician wants to see specific case data
    all_drug_match_data = pd.DataFrame()
      
    # for each match
    for index, row in matches.iterrows():
        
        match = row 
        
        # add to all matches dataframe
        all_drug_match_data = all_drug_match_data.append(match)
        
        # update summary table
        summarized_drug_matches = update_table(summarized_drug_matches, match)             
        
    return summarized_drug_matches, all_drug_match_data

In [371]:

summary, all_data = search(25, 'M05.272')
print(summary)
print('')
print(all_data)

  drug_name num_instances avg_efficacy avg_age  \
0         1             1            2      73   
1         0             2            3      73   

                              PatientID  
0  FB2ABB23-C9D0-4D09-8464-49BF0B982F0F  
1  FB2ABB23-C9D0-4D09-8464-49BF0B982F0F  

          AdmissionEndDate  AdmissionID_x  AdmissionID_y  \
0  1968-10-10 20:48:21.037            1.0            1.0   
3  1974-06-13 15:31:26.577            2.0            1.0   
6  2009-11-26 09:56:15.697            3.0            1.0   

        AdmissionStartDate   Age       PatientDateOfBirth PatientGender  \
0  1968-10-07 11:15:49.617  73.0  1947-12-28 02:45:40.547          Male   
3  1974-06-11 04:59:26.377  73.0  1947-12-28 02:45:40.547          Male   
6  2009-11-09 12:52:06.760  73.0  1947-12-28 02:45:40.547          Male   

                              PatientID PatientLanguage PatientMaritalStatus  \
0  FB2ABB23-C9D0-4D09-8464-49BF0B982F0F       Icelandic              Married   
3  FB2ABB23-C9D0-4D0