In [7]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import torch.utils.data

In [8]:
#Load train data
X_train = pd.read_csv('data/EstrogenReceptorStatus_Train.csv',index_col=0)
print("Training shape: {}".format(X_train.shape))

y_train = pd.read_csv('data/EstrogenReceptorStatus_Train_labels.txt',header=None)
print("Training labels shape: {}".format(y_train.shape))
print("Examples per label")
print(y_train.groupby([0])[0].count())

print("Training data head")
print(X_train.head(3))
# X_train = X_train.to_numpy
X_train = np.asarray(X_train)

print("Training labels head")
print(y_train.head(3))
# y_train = y_train.to_numpy
y_train = np.asarray(y_train)


Training shape: (216, 162)
Training labels shape: (216, 1)
Examples per label
0
0     55
1    161
Name: 0, dtype: int64
Training data head
         1-hexadecanol  1-monostearin  1,2,4-benzenetriol  \
case_95       9.729131       9.372530           12.635429   
case_88      10.961073       9.353313           12.692373   
case_24      10.995057       9.384110           12.954097   

         1,5-anhydroglucitol  2-aminoadipic,acid  2-hydroxybutanoic,acid  \
case_95            14.711658           11.162607               14.145654   
case_88            14.371407           11.128662               14.228230   
case_24            13.323175            8.162313               14.392132   

         2-hydroxyglutaric,acid  2-hydroxyvaleric,acid  2-ketoadipic,acid  \
case_95               12.334462              14.358346          12.103635   
case_88               12.177452              14.923601          12.378531   
case_24               17.532062              16.000497          12.558596   

  

In [9]:
#Load test data
X_test = pd.read_csv('data/EstrogenReceptorStatus_Test.csv',index_col=0)
print("Test shape: {}".format(X_test.shape))
# X_test = X_test.to_numpy
X_test = np.asarray(X_test)
print("Type: {}".format(type(X_test)))

y_test = pd.read_csv('data/EstrogenReceptorStatus_Test_labels.txt',header=None)
print("Test labels shape: {}".format(y_test.shape))
print("Examples per label")
print(y_test.groupby([0])[0].count())
# y_test = y_test.to_numpy
y_test = np.asarray(y_test)


Test shape: (55, 162)
Type: <class 'numpy.ndarray'>
Test labels shape: (55, 1)
Examples per label
0
0    12
1    43
Name: 0, dtype: int64


In [10]:
# Mapeamos datos a tensores de PyTorch
X_train, y_train, X_test, y_test = map(torch.tensor, (X_train, y_train, X_test, y_test))
y_train = torch.flatten(y_train)
y_test = torch.flatten(y_test)
print("Training data shape: {}".format(X_train.shape))
print("Training label shape: {}".format(y_train.shape))
print("Test data shape: {}".format(X_test.shape))
print("Test label shape: {}".format(y_test.shape))

Training data shape: torch.Size([216, 162])
Training label shape: torch.Size([216])
Test data shape: torch.Size([55, 162])
Test label shape: torch.Size([55])


In [11]:
X_train = X_train.float()
y_train = y_train.long()
X_test = X_test.float()
y_test = y_test.long()

print("Training data shape: {}".format(X_train.shape))
print("Training label shape: {}".format(y_train.shape))
print("Test data shape: {}".format(X_test.shape))
print("Training data shape: {}".format(y_test.shape))

Training data shape: torch.Size([216, 162])
Training label shape: torch.Size([216])
Test data shape: torch.Size([55, 162])
Training data shape: torch.Size([55])


In [12]:
class FFNN(nn.Module):
  
  def __init__(self):
    super().__init__()
    torch.manual_seed(0)
    self.net = nn.Sequential(
        nn.Linear(162, 30), 
        nn.ReLU(),
        nn.Linear(30, 30),
        nn.ReLU(),
        nn.Linear(30, 2), 
        nn.Softmax()
    )

  def forward(self, X):
    return self.net(X)

  def predict(self, X):
    Y_pred = self.forward(X)
    return Y_pred


In [13]:
def fit_v2(x, y, model, opt, loss_fn, epochs = 1000):
  
  for epoch in range(epochs):
    loss = loss_fn(model(x), y)
    loss.backward()
    opt.step()
    opt.zero_grad()
    # print(loss.item())
    
  return loss.item()

In [14]:
device = torch.device("cpu")

X_train=X_train.to(device)
y_train=y_train.to(device)
X_test=X_test.to(device)
y_test=y_test.to(device)
fn = FFNN()
fn.to(device)
loss_fn = F.cross_entropy
opt = optim.SGD(fn.parameters(), lr=0.01)
# opt = optim.AdamW(fn.parameters(), lr=0.01)

print('Final loss', fit_v2(X_train, y_train, fn, opt, loss_fn))

  input = module(input)


Final loss 0.5656735301017761


In [15]:
def accuracy(y_hat, y):
  pred = torch.argmax(y_hat, dim=1)
  return (pred == y).float().mean()

In [16]:
Y_pred_train = fn.predict(X_train)
#Y_pred_train = np.argmax(Y_pred_train,1)
Y_pred_test = fn.predict(X_test)
#Y_pred_val = np.argmax(Y_pred_val,1)

accuracy_train = accuracy(Y_pred_train, y_train)
accuracy_test = accuracy(Y_pred_test, y_test)

print("Training accuracy", (accuracy_train))
print("Test accuracy",(accuracy_test))

Training accuracy tensor(0.7454)
Test accuracy tensor(0.7818)


In [17]:
print(Y_pred_test)

tensor([[1.4470e-03, 9.9855e-01],
        [1.6987e-03, 9.9830e-01],
        [7.4396e-04, 9.9926e-01],
        [6.4093e-04, 9.9936e-01],
        [7.4992e-04, 9.9925e-01],
        [2.3222e-03, 9.9768e-01],
        [4.2696e-04, 9.9957e-01],
        [2.0166e-04, 9.9980e-01],
        [5.2661e-04, 9.9947e-01],
        [4.1780e-04, 9.9958e-01],
        [2.1103e-03, 9.9789e-01],
        [8.0393e-04, 9.9920e-01],
        [4.0256e-04, 9.9960e-01],
        [3.5450e-04, 9.9965e-01],
        [2.9960e-03, 9.9700e-01],
        [7.3735e-04, 9.9926e-01],
        [1.4495e-03, 9.9855e-01],
        [1.9574e-03, 9.9804e-01],
        [4.7706e-04, 9.9952e-01],
        [4.5524e-04, 9.9954e-01],
        [2.1890e-03, 9.9781e-01],
        [2.9192e-04, 9.9971e-01],
        [3.2595e-04, 9.9967e-01],
        [2.0867e-03, 9.9791e-01],
        [9.5795e-04, 9.9904e-01],
        [3.0283e-04, 9.9970e-01],
        [1.2642e-03, 9.9874e-01],
        [6.2599e-03, 9.9374e-01],
        [9.6212e-04, 9.9904e-01],
        [6.424

In [18]:
pred = torch.argmax(Y_pred_test, dim=1)
print(pred)

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1])
