In [116]:
# importing libraries to work with data 
import pandas as pd
import numpy as np
import datetime
import requests
import json

In [3]:
# getting the list of subject IDs having adverse effect
subjects_ae = requests.get('https://pyhack-dot-pharmanlp-177020.uc.r.appspot.com/api/1/StudyHack/ae/subject/list')
subjects_ae_json = subjects_ae.json()
subjects_ae = pd.DataFrame(subjects_ae_json['data'], columns = ['subject_id'])

In [4]:
# getting AE data for all subject IDs
ae_df = pd.DataFrame()
for subject_id in subjects_ae['subject_id']:
    response_ae = requests.get(f'https://pyhack-dot-pharmanlp-177020.uc.r.appspot.com/api/1/StudyHack/ae/subject/{subject_id}/list')
    response_ae_json = response_ae.json()
    ae_df = ae_df.append(response_ae_json['data'], ignore_index='true')

In [59]:
# converting the date column into a readable format to clean
ae_df['aestdat'] = pd.to_datetime(ae_df['aestdat'], errors = 'coerce', format = '%d-%b-%y')
ae_df['aeendat'] = pd.to_datetime(ae_df['aeendat'], errors = 'coerce', format = '%d-%b-%y')

# removing unwanted rows and columns
ae_df = ae_df[['siteid', 'subjectid', 'formid', 'aeterm', 'aestdat', 'aeendat', 'aecmgiv', 'aeongo']]
ae_df = ae_df.dropna(axis=0)

index1 = ae_df[ae_df['aecmgiv'] == 'NO'].index
ae_df.drop(index1, inplace = True)

In [7]:
# getting the list of subject IDs that have taken concomitant medication
subjects_cm = requests.get('https://pyhack-dot-pharmanlp-177020.uc.r.appspot.com/api/1/StudyHack/cm/subject/list')
subjects_cm_json= subjects_cm.json()
subjects_cm = pd.DataFrame(subjects_cm_json['data'], columns = ['subject_id'])

In [8]:
# getting CM data for all subject IDs
cm_df = pd.DataFrame()
for subject_id in subjects_cm['subject_id']:
    response_cm = requests.get(f'https://pyhack-dot-pharmanlp-177020.uc.r.appspot.com/api/1/StudyHack/cm/subject/{subject_id}/list')
    response_cm_json = response_cm.json()
    cm_df = cm_df.append(response_cm_json['data'], ignore_index='true')

In [60]:
# converting the date column into a readable format to clean
cm_df['cmstdat'] = pd.to_datetime(cm_df['cmstdat'], errors = 'coerce', format = '%d-%b-%y')
cm_df['cmendat'] = pd.to_datetime(cm_df['cmendat'], errors = 'coerce', format = '%d-%b-%y')

# removing unwanted rows and columns
cm_df = cm_df[['siteid', 'subjectid', 'formid', 'cmtrt', 'cmaer', 'cmstdat', 'cmendat', 'cmongo']]
cm_df = cm_df.dropna(axis=0) 

index2 = cm_df[cm_df['cmaer'] == 'NO'].index
cm_df.drop(index2, inplace = True)

In [126]:
# merging AE and CM dataframes based on their subject ID
final = pd.merge(ae_df, cm_df, on=['subjectid'], how='inner')

In [123]:
# type 1 discrepancy result
final['discrepancy'] = final['aestdat'] > final['cmstdat']
type1 = final.drop(final[final['discrepancy'] == False].index)
type1

Unnamed: 0,siteid_x,subjectid,formid_x,aeterm,aestdat,aeendat,aecmgiv,aeongo,siteid_y,formid_y,cmtrt,cmaer,cmstdat,cmendat,cmongo,discrepancy
40,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,ACETAMINOPHEN,YES,2018-04-26,2018-04-26,NO,True
41,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,DAPTOMYCIN,YES,2018-05-02,2018-05-04,NO,True
51,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,CASPOFUNGIN ACETATE,YES,2018-05-03,2018-05-03,NO,True
69,5173,76809,38768,DRUG ERUPTION,2018-05-27,2018-06-06,YES,NO,5173,38623,ACETAMINOPHEN,YES,2018-04-26,2018-04-26,NO,True
70,5173,76809,38768,DRUG ERUPTION,2018-05-27,2018-06-06,YES,NO,5173,38623,DAPTOMYCIN,YES,2018-05-02,2018-05-04,NO,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85409,5018,18282353,38768,ANEMIA,2020-02-07,2020-02-22,YES,NO,5018,38623,FOSFOMYCIN,YES,2020-01-25,2020-02-07,NO,True
85417,5018,18282353,38768,ANEMIA,2020-02-22,2020-03-12,YES,NO,5018,38623,AMIKACIN,YES,2020-01-23,2020-02-05,NO,True
85418,5018,18282353,38768,ANEMIA,2020-02-22,2020-03-12,YES,NO,5018,38623,FOSFOMYCIN,YES,2020-01-25,2020-02-07,NO,True
85423,5018,18282353,38768,FEBRILE NEUTROPENIA,2020-03-08,2020-03-11,YES,NO,5018,38623,AMIKACIN,YES,2020-01-23,2020-02-05,NO,True


In [118]:
# type 2 discrepancy result
final['discrepancy'] = final['cmstdat'] > final['aeendat']
type2 = final.drop(final[final['discrepancy'] == False].index)
type2

Unnamed: 0,siteid_x,subjectid,formid_x,aeterm,aestdat,aeendat,aecmgiv,aeongo,siteid_y,formid_y,cmtrt,cmaer,cmstdat,cmendat,cmongo,discrepancy
0,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,38623,ACETAMINOPHEN,YES,2018-07-14,2018-09-02,NO,True
1,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,38623,POTASSIUM CHLORIDE,YES,2018-08-08,2018-08-13,NO,True
2,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,38623,METOCLOPRAMIDE HYDROCHLORIDE,YES,2018-08-29,2018-08-29,NO,True
3,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,38623,POTASSIUM CHLORIDE,YES,2018-07-31,2018-08-01,NO,True
4,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,32993,DAUNORUBICIN HYDROCHLORIDE,,2019-01-24,2019-01-26,NO,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85415,5018,18282353,38768,ASPARTATE AMINOTRANSFERASE INCREASED,2020-01-15,2020-01-20,YES,NO,5018,38623,FOSFOMYCIN,YES,2020-01-25,2020-02-07,NO,True
85416,5018,18282353,38768,ASPARTATE AMINOTRANSFERASE INCREASED,2020-01-15,2020-01-20,YES,NO,5018,38623,LEUCOSTIM,YES,2020-03-08,2020-03-09,NO,True
85422,5018,18282353,38768,THROMBOCYTOPENIA,2020-01-17,2020-02-07,YES,NO,5018,38623,LEUCOSTIM,YES,2020-03-08,2020-03-09,NO,True
85428,5018,18282353,38768,ANEMIA,2020-01-18,2020-02-07,YES,NO,5018,38623,LEUCOSTIM,YES,2020-03-08,2020-03-09,NO,True


In [119]:
# type 3 discrepancy result
type3 = final[final.duplicated(['aestdat', 'aeterm'])]
type3

Unnamed: 0,siteid_x,subjectid,formid_x,aeterm,aestdat,aeendat,aecmgiv,aeongo,siteid_y,formid_y,cmtrt,cmaer,cmstdat,cmendat,cmongo,discrepancy
1,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,38623,POTASSIUM CHLORIDE,YES,2018-08-08,2018-08-13,NO,True
2,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,38623,METOCLOPRAMIDE HYDROCHLORIDE,YES,2018-08-29,2018-08-29,NO,True
3,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,38623,POTASSIUM CHLORIDE,YES,2018-07-31,2018-08-01,NO,True
4,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,32993,DAUNORUBICIN HYDROCHLORIDE,,2019-01-24,2019-01-26,NO,True
5,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,32993,CYTARABINE,,2019-02-20,2019-02-26,NO,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85425,5018,18282353,38768,FEBRILE NEUTROPENIA,2020-03-08,2020-03-11,YES,NO,5018,38623,LEUCOSTIM,YES,2020-03-08,2020-03-09,NO,False
85427,5018,18282353,38768,ANEMIA,2020-01-18,2020-02-07,YES,NO,5018,38623,FOSFOMYCIN,YES,2020-01-25,2020-02-07,NO,False
85428,5018,18282353,38768,ANEMIA,2020-01-18,2020-02-07,YES,NO,5018,38623,LEUCOSTIM,YES,2020-03-08,2020-03-09,NO,True
85430,5018,18282353,38768,CATHETER-ASSOCIATED SOFT TISSUE INFECTION,2020-01-23,2020-01-29,YES,NO,5018,38623,FOSFOMYCIN,YES,2020-01-25,2020-02-07,NO,False


In [120]:
# type 4 discrepancy result
type4 = final[final.duplicated(['cmtrt', 'cmstdat'])]
type4

Unnamed: 0,siteid_x,subjectid,formid_x,aeterm,aestdat,aeendat,aecmgiv,aeongo,siteid_y,formid_y,cmtrt,cmaer,cmstdat,cmendat,cmongo,discrepancy
29,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,ACETAMINOPHEN,YES,2018-07-14,2018-09-02,NO,True
30,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,POTASSIUM CHLORIDE,YES,2018-08-08,2018-08-13,NO,True
31,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,METOCLOPRAMIDE HYDROCHLORIDE,YES,2018-08-29,2018-08-29,NO,True
32,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,POTASSIUM CHLORIDE,YES,2018-07-31,2018-08-01,NO,True
33,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,32993,DAUNORUBICIN HYDROCHLORIDE,,2019-01-24,2019-01-26,NO,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85427,5018,18282353,38768,ANEMIA,2020-01-18,2020-02-07,YES,NO,5018,38623,FOSFOMYCIN,YES,2020-01-25,2020-02-07,NO,False
85428,5018,18282353,38768,ANEMIA,2020-01-18,2020-02-07,YES,NO,5018,38623,LEUCOSTIM,YES,2020-03-08,2020-03-09,NO,True
85429,5018,18282353,38768,CATHETER-ASSOCIATED SOFT TISSUE INFECTION,2020-01-23,2020-01-29,YES,NO,5018,38623,AMIKACIN,YES,2020-01-23,2020-02-05,NO,False
85430,5018,18282353,38768,CATHETER-ASSOCIATED SOFT TISSUE INFECTION,2020-01-23,2020-01-29,YES,NO,5018,38623,FOSFOMYCIN,YES,2020-01-25,2020-02-07,NO,False


In [122]:
final['discrepancy'] = final['aeendat'] > final['cmendat']
type5 = final.drop(final[final['discrepancy'] == False].index)
type5

Unnamed: 0,siteid_x,subjectid,formid_x,aeterm,aestdat,aeendat,aecmgiv,aeongo,siteid_y,formid_y,cmtrt,cmaer,cmstdat,cmendat,cmongo,discrepancy
11,5173,76809,38768,BONE MARROW ASPIRATION SITE PAIN,2018-04-26,2018-04-30,YES,NO,5173,38623,ACETAMINOPHEN,YES,2018-04-26,2018-04-26,NO,True
40,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,ACETAMINOPHEN,YES,2018-04-26,2018-04-26,NO,True
41,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,DAPTOMYCIN,YES,2018-05-02,2018-05-04,NO,True
44,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,LENOGRASTIM,YES,2018-05-17,2018-05-20,NO,True
45,5173,76809,38768,FEBRILE NEUTROPENIA,2018-05-04,2018-05-21,YES,NO,5173,38623,POLYETHYLENE GLYCOL TREATED HUMAN NORMAL IMMUN...,YES,2018-05-17,2018-05-19,NO,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85420,5018,18282353,38768,THROMBOCYTOPENIA,2020-01-17,2020-02-07,YES,NO,5018,38623,AMIKACIN,YES,2020-01-23,2020-02-05,NO,True
85423,5018,18282353,38768,FEBRILE NEUTROPENIA,2020-03-08,2020-03-11,YES,NO,5018,38623,AMIKACIN,YES,2020-01-23,2020-02-05,NO,True
85424,5018,18282353,38768,FEBRILE NEUTROPENIA,2020-03-08,2020-03-11,YES,NO,5018,38623,FOSFOMYCIN,YES,2020-01-25,2020-02-07,NO,True
85425,5018,18282353,38768,FEBRILE NEUTROPENIA,2020-03-08,2020-03-11,YES,NO,5018,38623,LEUCOSTIM,YES,2020-03-08,2020-03-09,NO,True
