# Tabular Form

In [1]:
from fastai import *
from fastai.tabular import *

In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
pos=pd.read_csv('/Users/deba_mycol/Zevac_Unified/zevac_unified/pp.csv', usecols=['slno', 'hosp_admission_id', 'hosp_patient_id', 
       'age','sex', 'department', 'sample_type', 'ward_name', 'caretype',
       'num_prev_adm', 'sc_date', 'interval', 'catheter', 'cancer',
       'cerebrovascular_disease', 'dementia_or_alzheimers', 'diabetes',
       'heart_disease', 'hiv_or_aids', 'hypertension', 'liver_disease',
       'pulmonary_disease_asthma', 'renal_disease',
       'rheumatic_or_connective_tissue_disease', 'no_comorbidity',
       'bacteria_name', 'antibiotic_name', 'gram', 'susceptibility'])

In [4]:
blood=pos[pos['sample_type']=='Blood']
blood=blood[blood['sex']!='Others'].reset_index(drop=True)

In [5]:
df=blood[['age', 'sex', 'caretype', 'department', 'num_prev_adm', 'interval', 'catheter', 
      'cancer', 'cerebrovascular_disease', 'dementia_or_alzheimers', 'diabetes',
      'heart_disease', 'hiv_or_aids', 'hypertension', 'liver_disease',
      'pulmonary_disease_asthma', 'renal_disease', 'rheumatic_or_connective_tissue_disease', 'no_comorbidity', 
      'bacteria_name', 'antibiotic_name', 'susceptibility']]
df=df.drop_duplicates()
df=df.reset_index(drop=True)

In [10]:
dep_var = 'susceptibility'
cat_names = ['sex', 'caretype', 'department', 'catheter', 'cancer', 'cerebrovascular_disease', 'dementia_or_alzheimers',
             'diabetes', 'heart_disease', 'hiv_or_aids', 'hypertension', 'liver_disease', 'pulmonary_disease_asthma', 
             'renal_disease', 'rheumatic_or_connective_tissue_disease', 'no_comorbidity', 'antibiotic_name']
cont_names = ['age', 'num_prev_adm', 'interval']
procs = [FillMissing, Categorify, Normalize]

In [17]:
train, test = train_test_split(df, test_size=0.3, random_state=0)
testIndex=test.index

In [18]:
df.iloc[testIndex]

Unnamed: 0,age,sex,caretype,department,num_prev_adm,interval,catheter,cancer,cerebrovascular_disease,dementia_or_alzheimers,...,hiv_or_aids,hypertension,liver_disease,pulmonary_disease_asthma,renal_disease,rheumatic_or_connective_tissue_disease,no_comorbidity,bacteria_name,antibiotic_name,susceptibility
7775,3.00,Male,WARD,Paediatrics,0,9.0,No,0,0,0,...,0,0,0,0,0,0,1,Enterococcus faecium,Penicillin,R
29077,20.00,Female,WARD,Medicine,0,31.0,No,0,0,0,...,0,0,0,0,0,0,1,Pseudomonas,Ceftazidime,S
12336,13.00,Male,WARD,Paediatrics,0,5.0,No,0,0,0,...,0,0,0,0,0,0,1,Acinetobacter,Meropenem,S
5167,64.00,Female,WARD,Gastroenterology,0,0.0,No,0,0,0,...,0,0,1,0,0,0,0,Klebsiella pneumoniae,Cefotaxime,R
10346,14.00,Male,ICU,Paediatrics,0,17.0,No,0,0,0,...,0,0,0,0,0,0,0,Klebsiella pneumoniae,Cefoperazone + Sulbactam,R
2330,15.00,Male,WARD,Neuro Anesthesia,0,14.0,No,0,0,0,...,0,0,0,0,0,0,1,Escherichia coli,Amikacin,S
8344,15.00,Female,WARD,Hematology,0,19.0,No,1,0,0,...,0,0,0,0,0,0,0,"Staphylococcus, coagulase negative",Chloramphenicol,S
2124,25.00,Male,WARD,Surgery,0,24.0,No,0,0,0,...,0,0,0,0,0,0,1,Enterococcus faecium,Tetracycline,S
3991,30.00,Male,WARD,Gastroenterology,0,3.0,No,1,0,0,...,0,0,1,0,0,0,0,Pseudomonas aeruginosa,Amikacin,S
16574,13.00,Female,ICU,Paediatrics,0,2.0,No,1,0,0,...,0,0,0,0,0,0,0,Acinetobacter,Piperacillin + Tazobactam,R


In [19]:
test = TabularList.from_df(df.iloc[testIndex].copy(), cat_names=cat_names, cont_names=cont_names)

In [20]:
data = (TabularList.from_df(df, cat_names=cat_names, cont_names=cont_names, procs=procs)
                           .split_by_idx(testIndex)
                           .label_from_df(cols=dep_var)
                           .add_test(test)
                           .databunch())

In [21]:
data.show_batch(rows=10)

sex,caretype,department,catheter,cancer,cerebrovascular_disease,dementia_or_alzheimers,diabetes,heart_disease,hiv_or_aids,hypertension,liver_disease,pulmonary_disease_asthma,renal_disease,rheumatic_or_connective_tissue_disease,no_comorbidity,antibiotic_name,age,num_prev_adm,interval,target
Male,WARD,Skin,No,0,0,0,0,0,0,0,0,0,0,0,1,Piperacillin + Tazobactam,-0.1559,0.0,-0.5847,R
Female,ICU,Nephrology,No,0,0,0,0,0,0,0,0,0,0,0,1,Amikacin,0.0536,0.0,2.0088,R
Female,WARD,Paediatrics,No,0,0,0,0,0,0,0,0,0,0,0,1,Ceftazidime,-1.2975,0.0,-0.6621,S
Male,ICU,Neuro Surgery,No,1,0,0,0,0,0,0,0,0,0,0,0,Ciprofloxacin,0.9752,0.0,-0.5073,R
Male,ICU,Neurology,No,0,0,0,0,0,0,0,0,0,0,0,1,Cefotaxime,1.0171,0.0,0.2282,R
Female,WARD,Nephrology,No,0,0,0,0,0,0,0,0,1,0,0,0,Ciprofloxacin,1.3942,0.0,-0.6234,S
Male,ICU,Medicine,No,0,0,0,0,0,0,0,0,0,0,0,1,Imipenem,2.232,0.0,-0.546,S
Female,WARD,Department of Emergency Medicine,No,0,0,0,0,0,0,0,0,0,0,0,1,Ciprofloxacin,-1.2904,0.0,-0.6621,R
Male,ICU,Gastroenterology,No,0,0,0,0,0,0,0,1,0,0,0,0,Amoxicillin + Clavulanic acid,0.9752,0.0,-0.3524,R
Male,WARD,Department of Emergency Medicine,No,0,0,0,0,0,0,0,0,0,0,0,1,Ciprofloxacin,-0.9938,0.0,-0.6621,S


In [22]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)

In [23]:
learn.fit(1, 1e-2)

epoch,train_loss,valid_loss,accuracy,time
0,0.539031,0.529441,0.719052,00:05


# Inference

In [24]:
row = df.iloc[0]

In [25]:
learn.predict(row)

(Category R, tensor(0), tensor([0.7878, 0.2122]))