In [130]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [131]:
df = pd.read_csv('/home/jupyter/charliemacuject/pharma_reports/data/dr2_boronia.csv')

In [132]:
class DrAudit:
    
    def __init__(self, df):
        self.df = df
    
    def study_period(self):
        # study period
        dates = pd.to_datetime(df['admission_date'])
        earliest = dates.min()
        latest = dates.max()
        return earliest, latest
    
    def num_patients(self):
        # number of patients
        return len(df['ur'].unique())
    
    def num_eyes(self):
        # number of eyes
        return len(df['eye_id'].unique())
    
    def total_visits(self):
        # total visits
        return len(df)
    
    def initiation_drug(self, id_number):
        '''
        Input: patient id_number corresponding to dataframe.
        Output: string of initiation drug for that patient.
        '''
        pdf = self.df[self.df['eye_id'] == id_number]
        pdf.dropna(subset=['Drug'], inplace=True)
        pdf['admission_date'] = pd.to_datetime(pdf['admission_date'])
        pdf = pdf.sort_values(by=['admission_date'])
        return pdf['Drug'].iloc[0]
    
    def initiation_all(self):
        '''
        Input: dataframe of all patients.
        Output: list of all initiation drugs.
        '''
        names = ['Lucentis', 'Eylea', 'Avastin']
        drugs = []
        self.df.dropna(subset=['Drug'], inplace=True)
        id_list = df['eye_id'].unique()
        for eye in id_list:
            drug = self.initiation_drug(eye)
            if drug in names:
                drugs.append(drug)
        luc_int = np.round(drugs.count('Lucentis') / len(drugs), 3)
        eyl_int = np.round(drugs.count('Eylea') / len(drugs), 3)
        av_int = np.round(drugs.count('Avastin') / len(drugs), 3)
        return luc_int, eyl_int, av_int
    
    def interval_column(self, pdf): # THIS NEEDS TO GO IN THE DF EDITING CLASS
        '''
        Input: patient dataframe.
        Output: patient dataframe with additional column for interval length.
        '''
        pdf.dropna(subset=['admission_date'], inplace=True)
        pdf['admission_date'] = pd.to_datetime(pdf['admission_date'])
        pdf = pdf.sort_values(by=['admission_date'])
        dates = pdf['admission_date'].reset_index(drop=True)
        intervals = [0]
        for i in range(len(dates)-1):
            initial, current = dates[i], dates[i+1]
            interval = int((current - initial).days / 7)
            intervals.append(interval)
        pdf['Interval'] = intervals
        return pdf.reset_index(drop=True)
    
    def interval_df(self): # ALSO NEEDS TO GO IN DF EDITING CLASS
        '''
        Input: whole dataframe.
        Output: whole dataframe with additional column for interval length.
        '''
        id_list = self.df['eye_id'].unique()
        frames = []
        for eye in id_list:
            pdf = self.df[self.df['eye_id'] == eye]
            pdf = self.interval_column(pdf)
            frames.append(pdf)
        return pd.concat(frames)

    def irf_extred(self):
        # this function counts the irf values when extending and reducing
        df = self.interval_df()
        df.dropna(subset=['irf'], inplace=True)
        df = df[df['irf'] < 1000]
        id_list = df['eye_id'].unique()
        irf_extend = []
        irf_reduce = []
        irf_maintain = []
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            for i in range(len(pdf)-1):
                if pdf['Interval'].iloc[i+1] > pdf['Interval'].iloc[i]:
                    irf_extend.append(pdf['irf'].iloc[i+1])
                elif pdf['Interval'].iloc[i+1] < pdf['Interval'].iloc[i]:
                    irf_reduce.append(pdf['irf'].iloc[i+1])    
                else:
                    irf_maintain.append(pdf['irf'].iloc[i+1])   
        return np.mean(irf_extend), np.mean(irf_reduce), np.mean(irf_maintain)
    
    # this function counts the srf values when extending and reducing
    def srf_extred(self):
        df = self.interval_df()
        df.dropna(subset=['srf'], inplace=True)
        df = df[df['srf'] < 1000]
        id_list = df['eye_id'].unique()
        srf_extend = []
        srf_reduce = []
        srf_maintain = []
        for eye in id_list:
            pdf = df[df['eye_id'] == eye]
            for i in range(len(pdf)-1):
                if pdf['Interval'].iloc[i+1] > pdf['Interval'].iloc[i]:
                    srf_extend.append(pdf['srf'].iloc[i+1])
                elif pdf['Interval'].iloc[i+1] < pdf['Interval'].iloc[i]:
                    srf_reduce.append(pdf['srf'].iloc[i+1])    
                else:
                    srf_maintain.append(pdf['srf'].iloc[i+1])   
        return np.mean(srf_extend), np.mean(srf_reduce), np.mean(srf_maintain)

In [162]:
class Results(DrAudit):
    
    def __init__(self, df):
        self.df = df
        
    def results_list(self):
        results_list = ['num_patients', 
                        'num_eyes', 'total_visits', 'initiation_lucentis',
                        'initiation_eylea', 'initiation_avastin',
                        'irf_extend', 'irf_reduce', 'irf_maintain',]
        return results_list
    
    def get_results(self):
        rlst = []
        rlst.append(DrAudit.num_patients(self))
        rlst.append(DrAudit.num_eyes(self))
        rlst.append(DrAudit.total_visits(self))
        luc_int, eyl_int, av_int = DrAudit.initiation_all(self)
        rlst.append(luc_int)
        rlst.append(eyl_int)
        rlst.append(av_int)
        irf_extend, irf_reduce, irf_maintain = DrAudit.irf_extred(self)
        srf_extend, srf_reduce, srf_maintain = DrAudit.srf_extred(self)
        rlst.append(irf_extend)
        rlst.append(irf_reduce)
        rlst.append(irf_maintain)
        return rlst
    
    def create_table(self):
        lst = self.get_results()
        results_list = self.results_list()
        return pd.DataFrame(lst, index = results_list, columns=['Dr2'])

In [163]:
table1 = Results(df)
to_upload = table1.create_table()

In [164]:
to_upload

Unnamed: 0,Dr2
num_patients,373.0
num_eyes,744.0
total_visits,22811.0
initiation_lucentis,0.616
initiation_eylea,0.367
initiation_avastin,0.017
irf_extend,56.660828
irf_reduce,55.090423
irf_maintain,51.162084


In [99]:
to_upload.to_csv('/home/jupyter/charliemacuject/pharma_reports/toupload.csv')