In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('/home/jupyter/charliemacuject/pharma_reports/data/dr2_boronia.csv')

In [3]:
class DrAudit:
    
    def __init__(self, df):
        self.df = df
    
    def study_period(self):
        # study period
        dates = pd.to_datetime(df['admission_date'])
        earliest = dates.min()
        latest = dates.max()
        return earliest, latest
    
    def num_patients(self):
        # number of patients
        return len(df['ur'].unique())
    
    def num_eyes(self):
        # number of eyes
        return len(df['eye_id'].unique())
    
    def total_visits(self):
        # total visits
        return len(df)
    
    def initiation_drug(self, id_number):
        '''
        Input: patient id_number corresponding to dataframe.
        Output: string of initiation drug for that patient.
        '''
        pdf = self.df[self.df['eye_id'] == id_number]
        pdf.dropna(subset=['Drug'], inplace=True)
        pdf['admission_date'] = pd.to_datetime(pdf['admission_date'])
        pdf = pdf.sort_values(by=['admission_date'])
        return pdf['Drug'].iloc[0]
    
    def initiation_all(self):
        '''
        Input: dataframe of all patients.
        Output: list of all initiation drugs.
        '''
        names = ['Lucentis', 'Eylea', 'Avastin']
        drugs = []
        self.df.dropna(subset=['Drug'], inplace=True)
        id_list = df['eye_id'].unique()
        for eye in id_list:
            drug = self.initiation_drug(eye)
            if drug in names:
                drugs.append(drug)
        luc_int = np.round(drugs.count('Lucentis') / len(drugs), 3)
        eyl_int = np.round(drugs.count('Eylea') / len(drugs), 3)
        av_int = np.round(drugs.count('Avastin') / len(drugs), 3)
        return luc_int, eyl_int, av_int
    
    def interval_column(self, pdf): # THIS NEEDS TO GO IN THE DF EDITING CLASS
        '''
        Input: patient dataframe.
        Output: patient dataframe with additional column for interval length.
        '''
        pdf.dropna(subset=['admission_date'], inplace=True)
        pdf['admission_date'] = pd.to_datetime(pdf['admission_date'])
        pdf = pdf.sort_values(by=['admission_date'])
        dates = pdf['admission_date'].reset_index(drop=True)
        intervals = [0]
        for i in range(len(dates)-1):
            initial, current = dates[i], dates[i+1]
            interval = int((current - initial).days / 7)
            intervals.append(interval)
        pdf['Interval'] = intervals
        return pdf.reset_index(drop=True)
    
    def interval_df(self): # ALSO NEEDS TO GO IN DF EDITING CLASS
        '''
        Input: whole dataframe.
        Output: whole dataframe with additional column for interval length.
        '''
        id_list = self.df['eye_id'].unique()
        frames = []
        for eye in id_list:
            pdf = self.df[self.df['eye_id'] == eye]
            pdf = self.interval_column(pdf)
            frames.append(pdf)
        return pd.concat(frames)

    def irf_extred(self):
        # this function counts the irf values when extending and reducing
        df = self.interval_df()
        df.dropna(subset=['irf'], inplace=True)
        df = df[df['irf'] < 1000]
        id_list = df['eye_id'].unique()
        irf_extend = []
        irf_reduce = []
        irf_maintain = []
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            for i in range(len(pdf)-1):
                if pdf['Interval'].iloc[i+1] > pdf['Interval'].iloc[i]:
                    irf_extend.append(pdf['irf'].iloc[i+1])
                elif pdf['Interval'].iloc[i+1] < pdf['Interval'].iloc[i]:
                    irf_reduce.append(pdf['irf'].iloc[i+1])    
                else:
                    irf_maintain.append(pdf['irf'].iloc[i+1])   
        return np.mean(irf_extend), np.mean(irf_reduce), np.mean(irf_maintain)
    
    # this function counts the srf values when extending and reducing
    def srf_extred(self):
        df = self.interval_df()
        df.dropna(subset=['srf'], inplace=True)
        df = df[df['srf'] < 1000]
        id_list = df['eye_id'].unique()
        srf_extend = []
        srf_reduce = []
        srf_maintain = []
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            for i in range(len(pdf)-1):
                if pdf['Interval'].iloc[i+1] > pdf['Interval'].iloc[i]:
                    srf_extend.append(pdf['srf'].iloc[i+1])
                elif pdf['Interval'].iloc[i+1] < pdf['Interval'].iloc[i]:
                    srf_reduce.append(pdf['srf'].iloc[i+1])    
                else:
                    srf_maintain.append(pdf['srf'].iloc[i+1])   
        return np.mean(srf_extend), np.mean(srf_reduce), np.mean(srf_maintain)

In [4]:
class Results(DrAudit):
    
    def __init__(self, df):
        self.df = df
        
    def results_list(self):
        results_list = ['num_patients', 
                        'num_eyes', 'total_visits', 'initiation_lucentis',
                        'initiation_eylea', 'initiation_avastin',
                        'irf_extend', 'irf_reduce', 'irf_maintain',
                        'srf_extend', 'srf_reduce', 'srf_maintain',]
        return results_list
    
    def get_results(self):
        rlst = []
        rlst.append(DrAudit.num_patients(self))
        rlst.append(DrAudit.num_eyes(self))
        rlst.append(DrAudit.total_visits(self))
        luc_int, eyl_int, av_int = DrAudit.initiation_all(self)
        rlst.append(luc_int)
        rlst.append(eyl_int)
        rlst.append(av_int)
        irf_extend, irf_reduce, irf_maintain = DrAudit.irf_extred(self)
        srf_extend, srf_reduce, srf_maintain = DrAudit.srf_extred(self)
        rlst.append(irf_extend)
        rlst.append(irf_reduce)
        rlst.append(irf_maintain)
        rlst.append(srf_extend)
        rlst.append(srf_reduce)
        rlst.append(srf_maintain)
        return rlst
    
    def create_table(self):
        lst = self.get_results()
        results_list = self.results_list()
        return pd.DataFrame(lst, index = results_list, columns=['Dr2'])

In [5]:
table1 = Results(df)
to_upload = table1.create_table()

In [6]:
to_upload

Unnamed: 0,Dr2
num_patients,374.0
num_eyes,746.0
total_visits,24048.0
initiation_lucentis,0.616
initiation_eylea,0.367
initiation_avastin,0.017
irf_extend,56.660828
irf_reduce,55.090423
irf_maintain,51.162084
srf_extend,35.384565


In [99]:
to_upload.to_csv('/home/jupyter/charliemacuject/pharma_reports/toupload.csv')

# Alex Tan

## Visual acuity testing class

In [152]:
class Vision:
    
    # df_list can be either drugs, improved, or switched
    def __init__(self, df_list):
        self.df_list = df_list
        
    def get_df(self, drop_drug_na=False):
        df = pd.read_csv('/home/jupyter/charliemacuject/pharma_reports/data/dr2_boronia_2016.csv')
        df['admission_date'] = pd.to_datetime(df['admission_date'])
        if drop_drug_na:
            df = df.dropna(subset=['Drug'])
            df = df[df['Drug'] != 'nil']
            df = df[df['Drug'] != 'None']
            df = df[df['Drug'] != 'Brolucizumab']
        return df
        
    def get_dataframes(self):
        if self.df_list == 'drugs':
            lucentis_df, eylea_df, mult_df = self.drug_df_separator()
            return [lucentis_df, eylea_df, mult_df]
        elif self.df_list == 'switched':
            luc_eyl, eyl_luc = self.switch_df_separator()
            luc_eyl
            return [luc_eyl, eyl_luc]
        elif self.df_list == 'improved':
            improved, not_improved, overall = self.improved_df_separator()
            return [improved, not_improved, overall]
        elif self.df_list == 'all':
            lucentis_df, eylea_df, mult_df = self.drug_df_separator()
            improved, not_improved, overall = self.improved_df_separator()
            luc_eyl, eyl_luc = self.switch_df_separator()
            return [lucentis_df, eylea_df, mult_df, improved, not_improved, overall, luc_eyl, eyl_luc]
            
    # groups dataframes by patients who improved vs didn't
    def improved_df_separator(self):
        df = self.get_df()
        improved, not_improved = [], []
        id_list = df['eye_id'].unique()
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            pdf.dropna(subset=['visual_acuity'], inplace=True)
            pdf.sort_values(by=['admission_date'], inplace=True)
            vision = pdf['visual_acuity'].to_list()
            if max(vision) == vision[0]:
                not_improved.append(pdf)
            else:
                improved.append(pdf)
        return pd.concat(improved), pd.concat(not_improved), df
            
    
    # this function groups dataframes by patients with specific switches
    def switch_df_separator(self):
        df = self.get_df(drop_drug_na=True)
        df = df[df['Drug'] != 'Avastin']
        luc_eyl, eyl_luc = [], []
        drug_list = ['Lucentis', 'Eylea']
        id_list = df['eye_id'].unique()
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            drugs = pdf['Drug'].unique()
            if len(drugs) > 1:
                if drugs[0] == 'Lucentis' and drugs[1] == 'Eylea':
                    luc_eyl.append(pdf)
                elif drugs[0] == 'Eylea' and drugs[1] == 'Lucentis':
                    eyl_luc.append(pdf)
            else:
                pass
        return pd.concat(luc_eyl), pd.concat(eyl_luc)
    
    # this function groups dataframes by patients on single vs mult drugs
    def drug_df_separator(self):
        df = self.get_df(drop_drug_na=True)
        df = df.dropna(subset=['Drug'])
        df = df[df['Drug'] != 'nil']
        df = df[df['Drug'] != 'None']
        df = df[df['Drug'] != 'Brolucizumab']
        lucentis, eylea, avastin, mult = [], [], [], []
        drug_list = ['Lucentis', 'Eylea', 'Avastin']
        id_list = df['eye_id'].unique()
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            drugs = pdf['Drug'].unique()
            if len(drugs) == 1:
                if drugs[0] == 'Lucentis':
                    lucentis.append(pdf)
                elif drugs[0] == 'Eylea':
                    eylea.append(pdf)
                else:
                    avastin.append(pdf)
            else:
                mult.append(pdf)
        return pd.concat(lucentis), pd.concat(eylea), pd.concat(mult)
    
    def pvi(self, df):
        id_list = df['eye_id'].unique()
        pvi_list = []
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            pdf.sort_values(by=['admission_date'], inplace=True)
            pdf.dropna(subset=['visual_acuity'], inplace=True)
            visions = pdf['visual_acuity'].to_list()
            pvi_pdf = max(visions) - visions[0]
            pvi_list.append(pvi_pdf)
        return pvi_list
    
    def tpvi(self, df):
        id_list = df['eye_id'].unique()
        tpvi_list = []
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            pdf.sort_values(by=['admission_date'], inplace=True)
            dates = pdf['admission_date'].to_list()
            initial_date = dates[0]
            pdf.dropna(subset=['visual_acuity'], inplace=True)
            visions = pdf['visual_acuity'].to_list()
            ndf = pdf[pdf['visual_acuity'] == max(visions)]
            new_dates = ndf['admission_date'].to_list()
            time = (new_dates[0] - initial_date).days
            tpvi_list.append(time)
        return tpvi_list
    
    def ovc(self, df):
        id_list = df['eye_id'].unique()
        ovc_list = []
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            pdf.sort_values(by=['admission_date'], inplace=True)
            pdf.dropna(subset=['visual_acuity'], inplace=True)
            visions = pdf['visual_acuity'].to_list()
            recent = visions[-1]
            initial = visions[0]
            ovc_list.append(recent - initial)
        return ovc_list
    
    def vlp(self, df):
        id_list = df['eye_id'].unique()
        vlp_list = []
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            pdf.sort_values(by=['admission_date'], inplace=True)
            pdf.dropna(subset=['visual_acuity'], inplace=True)
            visions = pdf['visual_acuity'].to_list()
            recent = visions[-1]
            max_vision = max(visions)
            vlp_list.append(max_vision - recent)
        return vlp_list
    
    def list_results(self, df):
        pvi_list, tpvi_list, vlp_list, ovc_list = self.pvi(df), self.tpvi(df), self.vlp(df), self.ovc(df)
        return np.mean(pvi_list), np.mean(tpvi_list), np.mean(vlp_list), np.mean(ovc_list)
    
    def list_sds(self, df):
        pvi_list, tpvi_list, vlp_list, ovc_list = self.pvi(df), self.tpvi(df), self.vlp(df), self.ovc(df)
        lst = [pvi_list, tpvi_list, vlp_list, ovc_list]
        to_return = []
        for item in lst:
            ci = stats.t.interval(alpha=0.95, df=len(item)-1, loc=np.mean(item), scale=stats.sem(item))
            std = np.mean(item) - ci[0]
            to_return.append(std)
        return np.round(to_return[0], 2), np.round(to_return[1], 2), np.round(to_return[2], 2), np.round(to_return[3], 2)

    def print_results(self):
        dfs = self.get_dataframes()
        lst = []
        if self.df_list == 'drugs':
            print("Displaying results for Lucentis, Eylea, and Multiple Drugs.")
        elif self.df_list == 'improved':
            print("Displaying results for Improved, Not_Improved and Overall.")
        elif self.df_list == 'switched':
            print("Displaying results for patients who switched from Lucentis to Eylea, and Eylea to Lucentis.")
        else:
            print("Displaying results for Lucentis, Eylea, Multiple Drugs, Improved, Didn't Improve, Lucentis to Eylea, Eylea to Lucentis.")
        for dataframe in dfs:
            pvi, tpvi, vlp, ovc = self.list_results(dataframe)
            pvi_sd, tpvi_sd, vlp_sd, ovc_sd = self.list_sds(dataframe)
            print("  ")
            print("PVI = {} letters, CI = +/-{}".format(np.round(pvi, 2), pvi_sd))
            print("TPVI = {} days, CI = +/-{}".format(np.round(tpvi), tpvi_sd))
            print("VLP = {} letters, CI = +/-{}".format(np.round(vlp, 2), vlp_sd))
            print("OVC = {} letters, CI = +/-{}".format(np.round(ovc, 2), ovc_sd))
            print("  ")

In [154]:
improvements = Vision("improved")
improvements.print_results()

Displaying results for Improved, Not_Improved and Overall.
  
PVI = 24.68 letters, CI = +/-2.89
TPVI = 298.0 days, CI = +/-28.09
VLP = 9.54 letters, CI = +/-1.09
OVC = 15.14 letters, CI = +/-3.08
  
  
PVI = 0.0 letters, CI = +/-nan
TPVI = 0.0 days, CI = +/-nan
VLP = 8.14 letters, CI = +/-1.62
OVC = -8.14 letters, CI = +/-1.62
  
  
PVI = 17.15 letters, CI = +/-2.22
TPVI = 207.0 days, CI = +/-22.67
VLP = 9.12 letters, CI = +/-0.9
OVC = 8.03 letters, CI = +/-2.37
  


## ANCHOR plot

In [155]:
anchor = [4.6, 8.4, 9.8, 10, 9.9]

In [158]:
def vision_weeks(df, weeks):
    id_list = df['eye_id'].unique()
    df.dropna(subset=['visual_acuity'], inplace=True)
    lst = []
    for eye in id_list:
        pdf = df[df['eye_id'] == eye]
        pdf.sort_values(by=['admission_date'], inplace=True)
        visions = pdf['visual_acuity'].to_list()
        if len(visions) > (weeks + 1):
            initial = visions[0]
            current = visions[weeks]
            lst.append(current - initial)
    return np.mean(lst)

In [162]:
vision_weeks(df, 4)

10.223360655737705