In [1]:
%cd ..
%cd root

/
/root


In [24]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
base_dir = ""
df_train = pd.read_csv(os.path.join(base_dir,"pro_train.csv"))
df_test = pd.read_csv(os.path.join(base_dir,"pro_test.csv"))
df_train.head(5)

Unnamed: 0.1,Unnamed: 0,Hospital_type_code,City_Code_Hospital,Hospital_region_code,Available Extra Rooms in Hospital,Department,Ward_Type,Ward_Facility_Code,Bed Grade,City_Code_Patient,Type of Admission,Severity of Illness,Visitors with Patient,Age,Admission_Deposit,Stay
0,231676,0,7,1,4,2,1,2,2.0,8.0,0,1,4,4,6247.0,2
1,166821,0,7,1,2,3,3,2,1.0,2.0,0,1,6,6,8000.0,10
2,70566,1,2,1,2,3,2,3,3.0,7.0,0,2,3,2,4987.0,1
3,197982,1,2,1,2,2,1,3,3.0,14.0,1,1,3,6,7210.0,6
4,280389,3,13,1,4,3,2,1,2.0,8.0,2,1,2,4,3178.0,5


In [25]:
cats = ["Hospital_type_code","City_Code_Hospital","Hospital_region_code","Department","City_Code_Patient", "Ward_Type", "Ward_Facility_Code", "Type of Admission"]
nums = ["Severity of Illness", "Age", "Bed Grade", "Admission_Deposit", "Visitors with Patient"]
print(len(cats), len(nums))
print(len(df_train.columns))
# stay and unnamed are dropped, 16 - 2 = 14

8 5
16


In [26]:
from sklearn.preprocessing import MinMaxScaler,StandardScaler
sc=StandardScaler()
sc.fit(df_train.copy().drop(columns=cats+["Stay","Unnamed: 0"]))

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
oh = OneHotEncoder()
oh.fit(df_train.Stay.to_numpy().reshape(-1,1))

OneHotEncoder(categories='auto', drop=None, dtype=<class 'numpy.float64'>,
              handle_unknown='error', sparse=True)

In [27]:
embedding_sizes = [(int(max(df_train[cat].unique())+1), min(50, (len(df_train[cat].unique())+1)//2)) for cat in cats]
embedding_sizes

[(7, 4), (14, 6), (3, 2), (5, 3), (39, 19), (6, 3), (6, 3), (3, 2)]

In [28]:
df_train = df_train.drop(["Unnamed: 0"], axis = 1)
df_test = df_test.drop(["Unnamed: 0"], axis=1)

In [29]:
from torch.utils.data import Dataset, DataLoader
class TabDataset(Dataset):
    def __init__(self, df, cats):
        self.x_cat = df.loc[:,cats].copy().values.astype(np.int64) #categorical columns
        self.x_num = sc.transform(df.drop(columns=cats+["Stay"]).copy()).astype(np.float32) #numerical columns
        self.y = oh.transform(df.Stay.to_numpy().reshape(-1,1)).toarray()
    def __len__(self): 
        return len(self.y)
    def __getitem__(self, idx):
        return self.x_cat[idx], self.x_num[idx], self.y[idx]

In [30]:
train_data = TabDataset(df_train, cats)
test_data = TabDataset(df_test, cats)

batch_size = 200

train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size, drop_last = True)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size, drop_last = True)

In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ANN(nn.Module):
    def __init__(self, embedding_sizes, n_cont):
        super().__init__()
        self.n_cont = n_cont
        self.embeddings = nn.ModuleList([nn.Embedding(categories, size) for categories,size in embedding_sizes])
        n_emb = sum(e.embedding_dim for e in self.embeddings) #length of all embeddings combined
        self.emb_drop = nn.Dropout(0.6)
        
        self.bn1 = nn.BatchNorm1d(n_cont)
        
        self.n_emb, self.n_cont = n_emb, n_cont
        self.linearBlock = nn.Sequential(
            nn.Linear(self.n_emb + self.n_cont, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(0.4),
            nn.Linear(256,128),
            nn.BatchNorm1d(128),
            nn.Dropout(0.4),
            nn.Linear(128,64),
            nn.BatchNorm1d(64),
            nn.Dropout(0.2),
            nn.Linear(64,11)
        )
        
    def forward(self, x1, x2):
        x_1 = [em(x1[:,i]) for i, em in enumerate(self.embeddings)]
            
        x_1 = torch.cat(x_1, axis = 1)
        x_1 = self.emb_drop(x_1)
        x_2 = self.bn1(x2)
        
        x = torch.cat([x_1, x_2], axis = 1)
        y = self.linearBlock(x)
        
        return y

In [35]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
model = ANN(embedding_sizes, 6)
model.to(device)
lr=0.05
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
epochs = 100
print(model)

cuda:0
ANN(
  (embeddings): ModuleList(
    (0): Embedding(7, 4)
    (1): Embedding(14, 6)
    (2): Embedding(3, 2)
    (3): Embedding(5, 3)
    (4): Embedding(39, 19)
    (5): Embedding(6, 3)
    (6): Embedding(6, 3)
    (7): Embedding(3, 2)
  )
  (emb_drop): Dropout(p=0.6, inplace=False)
  (bn1): BatchNorm1d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linearBlock): Sequential(
    (0): Linear(in_features=48, out_features=256, bias=True)
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Dropout(p=0.4, inplace=False)
    (3): Linear(in_features=256, out_features=128, bias=True)
    (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): Dropout(p=0.4, inplace=False)
    (6): Linear(in_features=128, out_features=64, bias=True)
    (7): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): Dropout(p=0.2, inplace=False)
    (9): Linear(in_fea

In [36]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)   
    y_test_num = torch.argmax(y_test, axis=1)
    correct_pred = (y_pred_tags == y_test_num).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    return acc*100

In [37]:
model.train()
loss_min = np.inf
for i in range(epochs):
    for batch_idx, (x1, x2, y) in enumerate(train_loader):
        x1, x2, y = x1.to(device), x2.to(device), y.to(device)
        model.zero_grad()
        output = model(x1, x2)
        loss = criterion(output, torch.argmax(y, axis=1))
        loss.backward()
        optimizer.step()
        if batch_idx == 0:
            test_acc = []
            for xx1, xx2, yy in test_loader:
              xx1, xx2, yy = xx1.to(device), xx2.to(device), yy.to(device)
              model.zero_grad()
              yyout = model(xx1, xx2)
              test_acc.append(multi_acc(yyout, yy).detach().cpu().numpy())
            print("Epoch: {}, Acc: {:.2f}, Val Acc: {:.2f}".format(i, multi_acc(output, y), np.mean(test_acc)))

Epoch: 0, Acc: 10.50, Val Acc: 17.23
Epoch: 1, Acc: 35.50, Val Acc: 36.25
Epoch: 2, Acc: 39.50, Val Acc: 36.18
Epoch: 3, Acc: 32.50, Val Acc: 36.50
Epoch: 4, Acc: 31.50, Val Acc: 36.01
Epoch: 5, Acc: 34.50, Val Acc: 36.25
Epoch: 6, Acc: 35.50, Val Acc: 35.78
Epoch: 7, Acc: 28.00, Val Acc: 33.46
Epoch: 8, Acc: 33.00, Val Acc: 36.35
Epoch: 9, Acc: 32.50, Val Acc: 34.26
Epoch: 10, Acc: 32.50, Val Acc: 33.05
Epoch: 11, Acc: 36.00, Val Acc: 36.01
Epoch: 12, Acc: 36.50, Val Acc: 35.48
Epoch: 13, Acc: 37.00, Val Acc: 36.52
Epoch: 14, Acc: 37.50, Val Acc: 35.55
Epoch: 15, Acc: 36.50, Val Acc: 36.59
Epoch: 16, Acc: 34.50, Val Acc: 36.09
Epoch: 17, Acc: 31.50, Val Acc: 36.47
Epoch: 18, Acc: 28.50, Val Acc: 32.18
Epoch: 19, Acc: 29.50, Val Acc: 34.84
Epoch: 20, Acc: 34.50, Val Acc: 35.03
Epoch: 21, Acc: 35.50, Val Acc: 34.37
Epoch: 22, Acc: 39.50, Val Acc: 36.61
Epoch: 23, Acc: 33.50, Val Acc: 36.89
Epoch: 24, Acc: 34.50, Val Acc: 35.55
Epoch: 25, Acc: 43.00, Val Acc: 35.70
Epoch: 26, Acc: 41.50,

In [None]:
# The highest is 43% train acc, 37% test acc