In [206]:
import pandas as pd
import numpy as np
np.set_printoptions(precision=3)

In [207]:
liver_data = pd.read_csv('../data/indian_liver_patient.csv')

# remove rows with missing data
liver_data = liver_data.dropna(subset=['Albumin_and_Globulin_Ratio'])

In [208]:
liver_data.describe()

Unnamed: 0,Age,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset
count,579.0,579.0,579.0,579.0,579.0,579.0,579.0,579.0,579.0,579.0
mean,44.782383,3.315371,1.494128,291.366149,81.126079,110.414508,6.481693,3.138515,0.947064,1.284974
std,16.221786,6.227716,2.816499,243.561863,183.182845,289.850034,1.084641,0.794435,0.319592,0.451792
min,4.0,0.4,0.1,63.0,10.0,10.0,2.7,0.9,0.3,1.0
25%,33.0,0.8,0.2,175.5,23.0,25.0,5.8,2.6,0.7,1.0
50%,45.0,1.0,0.3,208.0,35.0,42.0,6.6,3.1,0.93,1.0
75%,58.0,2.6,1.3,298.0,61.0,87.0,7.2,3.8,1.1,2.0
max,90.0,75.0,19.7,2110.0,2000.0,4929.0,9.6,5.5,2.8,2.0


Implement optimization algorithm for binary classification of liver dieseae (2:+/ 1:-)

In [209]:
# make labels in range <0, 1> (makes calculation easier/faster)
labels = liver_data['Dataset'].values - 1

mapping = {'Female': 0, 'Male': 1}
liver_data = liver_data.replace({'Gender': mapping})
data = liver_data.drop(columns=['Dataset']).values

In [210]:
# normalize input data
max_data = np.max(data, 0)
data_norm = data / max_data

In [211]:
def get_train_and_validatation_subsets(data, labels, train_subset_size=0.8):
    train_size = int(train_subset_size * len(labels))
    choice = np.random.choice(range(data.shape[0]), size=(train_size,), replace=False)    
    ind = np.zeros(data.shape[0], dtype=bool)
    ind[choice] = True
    valid = ~ind
    
    data_train = data[ind]
    labels_train = labels[ind]
    data_valid = data[valid]
    labels_valid = labels[valid]
    return data_train, labels_train, data_valid, labels_valid

In [212]:
d_t, l_t, d_v, l_v = get_train_and_validatation_subsets(data_norm, labels)

In [213]:
import torch
import torch.nn as nn
import torch.nn.functional as F
device = torch.device("cpu")

class BasciNetwork(nn.Module):
    def __init__(self, input_size, output_size, seed, fc1_units=64, fc2_units=64):
        super(BasciNetwork, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fc1 = nn.Linear(input_size, fc1_units)
        self.fc2 = nn.Linear(fc1_units, fc2_units)
        self.fc3 = nn.Linear(fc2_units, output_size)

    def forward(self, state):
        """Build a network that maps state -> action values."""
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        return torch.sigmoid(self.fc3(x))


In [125]:
# Every time you run this cell you restart the network (you need to learn again the nn)
model = BasciNetwork(10, 1, 0)

In [214]:
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

In [215]:
_in = torch.Tensor(d_t).float().to(device) 
_out = torch.Tensor(l_t).float().unsqueeze(1).to(device)
for t in range(1000):
    y_pred = model(_in)
    #y_pred[y_pred > 0.5] = 1
    #y_pred[y_pred <= 0.5] = 0
    # Compute and print loss
    loss = criterion(y_pred, _out)
    if t % 500 == 0:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] = 0
loss = criterion(y_pred, _out)
print(f'Final accuracy on learn data: {100 * (_out.size()[0] - loss.item()) / _out.size()[0]} % (size: {_out.size()[0]})')

0 7.208397388458252
500 7.169987678527832
Final accuracy on learn data: 98.48812095032397 % (size: 463)


In [216]:
_in = torch.Tensor(d_v).float().to(device) 
_out = torch.Tensor(l_v).float().unsqueeze(1).to(device)
y_pred = model(_in)
y_pred[y_pred > 0.5] = 1
y_pred[y_pred <= 0.5] = 0
loss = criterion(y_pred, _out)
print(f'Final accuracy on validation data: {100 * (_out.size()[0] - loss.item()) / _out.size()[0]} % (size: {_out.size()[0]})')

Final accuracy on validation data: 98.27586206896552 % (size: 116)


In [217]:
save_path = '/Users/magdalena.zastawnik-gula/Documents/Courses/ML_MIT/basic_env/weight.pth'
torch.save(model.state_dict(),save_path)

116