In [1]:
import torch
import torch.nn as nn
import numpy as np

import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
iris = sns.load_dataset('iris')

data = torch.tensor(iris[iris.columns[0:4]].values).float()

labels = torch.zeros(len(data), dtype = torch.long)
labels[iris.species=='versicolor'] = 1
labels[iris.species=='virginica'] = 2

In [24]:
print(labels[:101])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 2])


In [25]:
[1,2]+[3,4]

[1, 2, 3, 4]

In [64]:
proportion =.8
n_training = int(len(labels)*proportion)

train_test_bool = np.zeros(len(labels),dtype=bool)

items_for_train = np.concatenate((np.random.choice(range(int(len(labels)/3)), int(n_training/3), replace=False), \
(50+np.random.choice(range(int(len(labels)/3)), int(n_training/3), replace=False)),\
(100+np.random.choice(range(int(len(labels)/3)), int(n_training/3), replace=False))))

train_test_bool[items_for_train] = True
# train_test_bool[range(n_training)] = True
train_test_bool

array([ True,  True,  True, False,  True,  True,  True,  True, False,
        True,  True,  True,  True,  True,  True,  True,  True, False,
        True, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True, False, False,  True,
       False,  True, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False, False,  True,  True,
        True, False,  True, False,  True,  True,  True, False,  True,
        True,  True,  True,  True,  True, False, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True, False,
        True,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True, False,  True,  True,
        True, False, False,  True,  True,  True,  True,  True,  True,
       False,  True,

In [65]:
print('Average of full data:')
print(torch.mean(labels.float()))
print(' ')

print('Average of training data:')
print(torch.mean(labels[train_test_bool].float()))
print(' ')

print('Average of test data:')
print(torch.mean(labels[~train_test_bool].float()))
print(' ')

Average of full data:
tensor(1.)
 
Average of training data:
tensor(1.)
 
Average of test data:
tensor(1.)
 


In [66]:
ANNiris = nn.Sequential(
    nn.Linear(4,64),
    nn.ReLU(),
    nn.Linear(64,64),
    nn.ReLU(),
    nn.Linear(64,3)
)

lossfun = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(ANNiris.parameters(),lr=.01)

In [67]:
print(data.shape)

print(data[train_test_bool, :].shape)

print(data[~train_test_bool, :].shape)

torch.Size([150, 4])
torch.Size([120, 4])
torch.Size([30, 4])


In [68]:
num_epochs = 1000

losses = torch.zeros(num_epochs)
ongoing_acc = []

for epoch in range(num_epochs):
    yHat = ANNiris(data[train_test_bool, :])
    
    ongoing_acc.append(100*torch.mean((torch.argmax(yHat, axis=1) == labels[train_test_bool]).float()))
    
    loss = lossfun(yHat, labels[train_test_bool])
    losses[epoch] = loss
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    

In [69]:
def get_class_accuracy(predictlabels, labels, num_of_class):
    len_of_labels = torch.sum((labels == num_of_class).float())
    predicted_class = torch.logical_and((labels == num_of_class),(predictlabels == num_of_class)).float()
    return 100*torch.sum(predicted_class)/len_of_labels

predictions = ANNiris(data[train_test_bool, :])
train_acc = 100*torch.mean((torch.argmax(predictions, axis=1) == labels[train_test_bool]).float())

predictions = ANNiris(data[~train_test_bool, :])
test_acc = 100*torch.mean((torch.argmax(predictions, axis=1) == labels[~train_test_bool]).float())

acc_versicolor = get_class_accuracy(torch.argmax(predictions, axis=1), labels[~train_test_bool], 1)
acc_setosa = get_class_accuracy(torch.argmax(predictions, axis=1), labels[~train_test_bool], 0)
acc_virginica= get_class_accuracy(torch.argmax(predictions, axis=1), labels[~train_test_bool], 2)

In [70]:
print(f"Final TRAIN accuracy: {train_acc}")
print(f"Final TEST accuracy: {test_acc}")
print(f"Setosa test accuracy: {acc_setosa}")
print(f"Versicolor test accuracy: {acc_versicolor}")
print(f"Virginica test accuracy: {acc_virginica}")

Final TRAIN accuracy: 98.33333587646484
Final TEST accuracy: 100.0
Setosa test accuracy: 100.0
Versicolor test accuracy: 100.0
Virginica test accuracy: 100.0
