In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('data/data_clean.csv')

## 1) Fixed-dose

In [4]:
# accuracy is just the fraction of individuals that are truly medium
df.loc[df['correct_dosage'] == 'medium'].shape[0] / df.shape[0]

0.611794500723589

## 2) Clinical dosing algorithm

In [30]:
alt = df.copy()

alt['Age in decades'] = alt['Age'].apply(lambda x: int(x[0]) if x != 'unknown' else np.nan)

alt.loc[alt['Height (cm)'] == 'unknown', 'Height (cm)'] = np.nan
alt.loc[alt['Weight (kg)'] == 'unknown', 'Weight (kg)'] = np.nan

alt['Asian race'] = alt['Race'].map({'Asian': 1}).fillna(0)
alt['Black or African American race'] = alt['Race'].map({'Black or African American': 1}).fillna(0)
alt['Missing or mixed race'] = alt['Race'].map({'Unknown': 1}).fillna(0)
alt['Enzyme inducer status'] = np.where(
    (alt['Carbamazepine (Tegretol)'] == 1) | 
    (alt['Phenytoin (Dilantin)'] == 1) | 
    (alt['Rifampin or Rifampicin'] == 1),
    1,
    0
)

alt['Amiodarone status'] = alt['Amiodarone (Cordarone)'].map({'1.0': 1}).fillna(0)

In [34]:
alt[[
    'Age in decades', 'Height (cm)', 'Weight (kg)', 'Asian race', 'Black or African American race',
    'Missing or mixed race', 'Enzyme inducer status', 'Amiodarone status'
]].dtypes

Age in decades                    float64
Height (cm)                        object
Weight (kg)                        object
Asian race                        float64
Black or African American race    float64
Missing or mixed race             float64
Enzyme inducer status               int64
Amiodarone status                 float64
dtype: object

In [39]:
alt['sqrt_weekly_dose'] = 4.0376 - 0.2546 * alt['Age in decades'] + 0.0118 * alt['Height (cm)'].astype(float) \
    + 0.0134 * alt['Weight (kg)'].astype(float) - 0.6752 * alt['Asian race'] \
    + 0.4060 * alt['Black or African American race'] + 0.0443 * alt['Missing or mixed race'] \
    + 1.2799 * alt['Enzyme inducer status'] - 0.5695 * alt['Amiodarone status']

alt['pred_weekly_dose'] = np.where(
    alt['sqrt_weekly_dose'] < np.sqrt(21),
    'low',
    np.where(
        alt['sqrt_weekly_dose'] > np.sqrt(49),
        'high',
        'medium'
    )
)

In [42]:
alt.loc[alt['correct_dosage'] == alt['pred_weekly_dose']].shape[0] / alt.shape[0]

0.6410998552821997