In [1]:
# Library Imports
import os
import import_ipynb
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from statistics import mean, median, mode, stdev 

# from Descript import extract
%matplotlib inline

In [None]:
# Read dataset
df = pd.read_csv("../DATA/2807.csv", sep=",", header=0)

# Convert Date attributes from type object to datetime object.
df['TreatmentStartDate'] = pd.to_datetime(df['TreatmentStartDate'], format='%Y-%m-%d %H:%M')
df['CtrlDatetime'] = pd.to_datetime(df['CtrlDatetime'], format='%Y-%m-%d %H:%M')
df['installDate'] = pd.to_datetime(df['installDate'], format='%Y-%m-%d')

# Set new attributes of Time and Date of Treatment
df['treatmentDate'] = df['TreatmentStartDate'].dt.date
# df['treatmentDate'] = pd.to_datetime(df['treatmentDate'])

df['treatmentTime'] = df['TreatmentStartDate'].dt.time
# df['treatmentTime'] = pd.to_datetime(df['treatmentTime'])

In [None]:
# Select relevant attributes
dfa = df[['CutitronicsClientID','CutitronicsSKUName','CutitronicsCartID', 'installDate', 'TreatmentStartDate', 'treatmentDate', 'treatmentTime', 'CartDispensedAmount','CartLevel', 'CartInitLevel', 'presrcibedAM', 'prescribedPM','PrescriptionType', 'CtrlSkinHealth']]


# Perform check on time attribute. If the value within 4AM to 5pm this is considered a 'day' treatment.
# Otherwise the treatment is considered an 'evening' treamtent.
for i, row in dfa.iterrows():
    if row['treatmentTime'].hour < 4:
        am_val = 0
        pm_val = dfa.loc[i, 'CartDispensedAmount']
    elif row['treatmentTime'].hour > 17:
        am_val = 0
        pm_val = dfa.loc[i, 'CartDispensedAmount']
    else:
        am_val = dfa.loc[i, 'CartDispensedAmount']
        pm_val = 0
    dfa.loc[i,'actualAM'] = am_val
    dfa.loc[i,'actualPM'] = pm_val

# Assign int64 data type to attributes as required.    
dfa[['actualAM', 'actualPM', 'CtrlSkinHealth']] = dfa[['actualAM', 'actualPM', 'CtrlSkinHealth']].astype('int64')

# Group data by ClientID, Product and Date.
dfa = dfa.groupby(['CutitronicsClientID', 'CutitronicsSKUName', 'treatmentDate']).agg({ 
    'installDate': 'last',
    'CutitronicsCartID': 'first',
    'CartDispensedAmount': 'sum',
    'CartLevel': 'last',
    'CartInitLevel': 'first',
    'presrcibedAM':'first',
    'prescribedPM':'last',
    'actualAM': 'sum',
    'actualPM':'sum',
    'PrescriptionType': 'first',
    'CtrlSkinHealth': 'mean'
})

# Used to iterate over rows and detemrine if the clients actual usage matches their presrcibed usage.
for i, row in dfa.iterrows():
    if row['PrescriptionType'] == 'DAILY':
        if row['presrcibedAM'] == row['actualAM'] and row['prescribedPM'] == row['actualPM']:
            complied = 'Yes' # 1 = True (User Complied with prescription for that date)
        else:
            complied = 'No' # 0 = False (User did not comply with prescription for that date)
        dfa.loc[i,'compliance'] = complied
        
    else:
        if row['presrcibedAM'] == row['actualAM'] or row['prescribedPM'] == row['actualPM']:
            complied = 'Yes' # 1 = True (User Complied with prescription for that date)
        else:
            complied = 'No' # 0 = False (User did not comply with prescription for that date)
        dfa.loc[i,'compliance'] = complied
    
dfa[['actualAM', 'actualPM']] = dfa[['actualAM', 'actualPM']].astype('int64')

a = dfa['compliance'].value_counts(normalize=True) * 100
labels = dfa['compliance'].value_counts().index.tolist()
print(a)

plt.style.use('ggplot')

plt.bar(labels, a, color='green')
plt.xlabel(labels)
plt.ylabel("Percentage of Users")
plt.title("Percentage of Users Complying with Prescription")
plt.show()

asR = dfa.loc[dfa['PrescriptionType'] == 'AS REQUIRED'].reset_index()
df = dfa.reset_index()

In [None]:
def missed(dfName):
    # Assign global to returned DFs
    global df
    global grouped_df
    
    # Assign df to the df name provided
    df = dfName
    
    # Apply funcion used for finding missed usage dates
    def foo(gr):
        gr = gr.set_index('treatmentDate')
        idx = pd.date_range(gr.index.min(), gr.index.max())
        gr.index = pd.DatetimeIndex(gr.index)
        gr = gr.reindex(idx, fill_value=0)
        return gr
    
    # Groupby and Apply function
    df = df[df['PrescriptionType'] == 'DAILY'].groupby(['CutitronicsClientID', 'CutitronicsSKUName']).apply(func=foo)
    
    # Groupy causes duplicate column with one as index
    # The col version is dropped to allow for reset of index
    df = df.drop(['CutitronicsClientID', 'CutitronicsSKUName'], axis = 1) 
    grouped_df = df
    df = df.reset_index()
    df =df.rename(columns={"level_2": "treatmentDate"})
    
    # Default of 0 is applied to all vlaues in missed usage row.
    # Alters comliance value to 'Missed' rather than 0
    for i, row in df.iterrows():
            if row['compliance'] == 0:
                complied = 'MISSED'
                date = row['treatmentDate']
            else:
                complied = row['compliance']
            df.loc[i,'compliance'] = complied
        
# Call function with df created previously
missed(df)

# # Join DF's back together
main_df = pd.concat([df, asR], join='inner').reindex()