In [1]:
# Exercise_1
# main simple classification Iris data
# TODO**
# Names of group members: Fabian Bloch and Christopher Mahn
# Date: April 24th, 2023
# TODO**

In [2]:
#%% Load modules
import numpy as np
import pandas as pd
# import csv

# torch modules
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# plot module
import matplotlib.pyplot as plt

#evaluate modele
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import SVC

In [3]:
# %% CUDA for PyTorch
# Right at the beginning: check if a cuda compatible GPU is available in your computer. 
# If so, set device = cuda:0 which means that later all calculations will be performed on the graphics card. 
# If no GPU is available, the calculations will run on the CPU, which is also absolutely sufficient for these exercises.
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
#cudnn.benchmark = True

if device.type == 'cpu':
    device_num = 0
    print('No GPU available.')
else:
    device_num = torch.cuda.device_count()
    print('Device:', device, '-- Number of devices:', device_num)

Device: cuda:0 -- Number of devices: 1


In [4]:
# Mounting Google Drive locally 
# from google.colab import drive
#drive.mount("/content/drive", force_remount=True)
# drive.mount('/content/drive')
# you can also choose one of the other options to load data
# therefore see https://colab.research.google.com/notebooks/io.ipynb#scrollTo=u22w3BFiOveA

In [7]:
# %% read data
# path to wine quality data
data_path = '/hdd/repository/hcu-ma-gmt-big-data-analysis/data/ex2/data/iris_flowers/iris.data'

# read csv sheet with pandas
df = pd.read_csv(data_path, sep=',')

# # drop nan data
df = df.dropna()
# drop each row where there is not at least 21 not nan data
# df = df.dropna(thresh=21) # there are many nan in row 4 12 14, which can cause errors

# get numpy out of pandas dataframe
data = df.values
# data=df.to_numpy()

# get column names to see, which columns we have to extract as x and y
column_names = np.array(df.columns[:], dtype=np.str_)

# TODO**
print(f'Größe des Datensatzes: {np.shape(data)[0]}')
print(f'Dimension des Datensatzes: {np.shape(data)[1]}')
# TODO**

Größe des Datensatzes: 149
Dimension des Datensatzes: 5


In [8]:
# %% split in X and Y
# extract any feature you want as X 
# extract target values as Y
x = np.array(data[:,:-1], dtype=np.float32)
# x = np.array(data[:,6], dtype=np.float32)
y = pd.factorize(data[:,-1])[0]

class_names = np.unique(data[:,-1])

# save number of classes
nc = np.max(y)+1

print('x shape:', x.shape)
print('y shape:', y.shape)
print('number of classes', nc)

x shape: (149, 4)
y shape: (149,)
number of classes 3


In [9]:
# %% normalize X between (0,1). If multiple features in X are selected, each feature is normalized individually
scale_x = np.max(x, axis=0)
x = x/scale_x
print('Scale_x:',scale_x)

Scale_x: [7.9 4.4 6.9 2.5]


In [10]:
# %% convert to torch tensors
# if tensors have only one dimension, an artificial dimension is created with unsqueeze (e.g. [10]->[10,1], so 1D->2D)
Y = torch.from_numpy(y)
Y = Y.long()

# produce onehot target tensor
# scatter_ mehtod fills the tensor with values from a source tensor along the indices provided as arguments
# oh = one hot encoding
Y_oh = torch.zeros(Y.shape[0], nc)
Y_oh.scatter_(1,Y.unsqueeze(1), 1.0)

X = torch.from_numpy(x)
X = X.float()
if len(X.shape)==1:
    X = X.unsqueeze(dim=1)

In [11]:
# %% Split dataset in training, validation and test tensors
# TODO**
prop_train = 0.5
prop_val = 0.25
prop_test = 0.25
# TODO**

sample_num = {'all': X.shape[0], 
              'train': round(prop_train*X.shape[0]),
              'val': round(prop_val*X.shape[0]),
              'test': round(prop_test*X.shape[0])}

# idx shuffle
idx = np.random.choice(sample_num['all'], sample_num['all'], replace=False)
# assign idx to each sample
sample_idx = {'all': idx[:], 
              'train': idx[0:sample_num['train']],
              'val': idx[sample_num['train']:sample_num['train']+sample_num['val']],
              'test': idx[sample_num['train']+sample_num['val']:]}

# Create train data
X_train = X[sample_idx['train']]
Y_train_oh = Y_oh[sample_idx['train']]
Y_train = Y[sample_idx['train']]

# Create validation data
X_val = X[sample_idx['val']]
Y_val_oh = Y_oh[sample_idx['val']]
Y_val = Y[sample_idx['val']]

# Create test data
X_test = X[sample_idx['test']]
Y_test_oh = Y_oh[sample_idx['test']]
Y_test = Y[sample_idx['test']]

# Show data point
print('Input of first ten train Sample:', X_train[0:10])
print('Target of first ten train Sample:', Y_train[0:10])
print('One-Hot-Encoded Target of first ten train Sample:', Y_train_oh[0:10])

Input of first ten train Sample: tensor([[0.6835, 0.7727, 0.2174, 0.1600],
        [0.6076, 0.7727, 0.2754, 0.0800],
        [0.6329, 0.7955, 0.2319, 0.2400],
        [0.6076, 0.7727, 0.2319, 0.0800],
        [0.6456, 0.8636, 0.2174, 0.1200],
        [0.8354, 0.6818, 0.6377, 0.5600],
        [0.9747, 0.8636, 0.9710, 0.8800],
        [0.6962, 0.9545, 0.2029, 0.0800],
        [0.7975, 0.7500, 0.8696, 1.0000],
        [0.7342, 0.6136, 0.7391, 0.7600]])
Target of first ten train Sample: tensor([0, 0, 0, 0, 0, 1, 2, 0, 2, 2])
One-Hot-Encoded Target of first ten train Sample: tensor([[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 0., 1.]])


In [12]:
#%% class of neural network 'ClassificationNet'
# set up layer and architecture of network in constructor __init__
# define operations on layer in forward pass method
class ClassificationNet(nn.Module):
    
    def __init__(self, inputSize, outputSize):
        super(ClassificationNet, self).__init__()
        self.fc1 = nn.Linear(inputSize, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, outputSize)
    
    def forward(self, x):
        # max pooling over (2, 2) window
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    

In [13]:
#%% Specify network parameter
# TODO**  
inputDim = 4
outputDim = 3
 
# Create instance of ClassificationNet
net = ClassificationNet(inputDim, outputDim)
# TODO** 

In [15]:
#%% Send tensors and networks to GPU (if you have one which supports cuda) for faster computations
# Note: Y is one-hot-encoded
X_train, Y_train_oh = X_train.to(device), Y_train_oh.to(device)
X_val, Y_val_oh = X_val.to(device), Y_val_oh.to(device)
X_test, Y_test_oh = X_test.to(device), Y_test_oh.to(device)

# The network itself must also be sent to the GPU. Either you write net = RegressNet() and then later net.to(device) or directly net = RegressNet().to(device)
# The latter option may have the advantage that the instance net is created directly on the GPU, whereas in variant 1 it must first be sent to the GPU.
if device_num>1:
    print("Let's use", device_num, "GPU's")
    net = nn.DataParallel(net)
net.to(device) 
print(net)

ClassificationNet(
  (fc1): Linear(in_features=4, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=3, bias=True)
)


In [16]:
#%% Specify hyperparameter
# hyperparemter: num_epoch, num_lr, loss_func, optimizer
# TODO**  
num_epoch = 1000
num_lr = 0.02
# TODO**  
# Loss and optimizer
loss_func = nn.MSELoss() # -> one hot encoded 'target' to loss-function
optimizer = optim.Adam(net.parameters(), lr=num_lr)

In [19]:
#%% Loss and Accuracy before training
# Compute loss of test data before training the network (with random weights)
Y_pred_test_before_oh = net(X_test)
# loss function input looks as follows: loss_func(prediction, target)
# Note: for CrossEntropyLoss(): prediction is one_hot_encoded, target has single dimension
# for MSELoss(): target and loss has to be both one_hot_encoded 
loss_test_before = loss_func(Y_pred_test_before_oh, Y_test_oh)

# Accuracy before training
y_pred_test_before = np.argmax(Y_pred_test_before_oh.cpu().detach().numpy(), axis=1)
correct_before = np.sum(y_pred_test_before == Y_test.numpy())

RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
#%% Train the network
plt.figure() # monitor loss curve during training
# for loop over epochs
for epoch in range(num_epoch):
    # classical forward pass -> predict new output from train data
    Y_pred_train_oh = net(X_train)
    # compute loss    
    loss_train = loss_func(Y_pred_train_oh, Y_train_oh)
    
    # Compute gradients
    optimizer.zero_grad()
    # Note: Calling .backward() mutiple times accumulates the gradient (by addition) for each parameter. This is why you should call optimizer.zero_grad() after each .step() call
    # Note that following the first .backward call, a second call is only possible after you have performed another forward pass.
    loss_train.backward()
    # perform a parameter update based on the current gradient (stored in .grad attribute of a parameter)
    optimizer.step()
    
    # TODO**
    # forward pass for validation
    Y_pred_val_oh =
    loss_val =
    # TODO**
    
    # compute actual train accuracy
    y_pred_train = np.argmax(Y_pred_train_oh.cpu().detach().numpy(), axis=1)
    correct_train = np.sum(y_pred_train == Y_train.numpy())
    
    # compute actual val accuracy
    y_pred_val = np.argmax(Y_pred_val_oh.cpu().detach().numpy(), axis=1)
    correct_val = np.sum(y_pred_val == Y_val.numpy())
    
    # plot train and val loss and accuracies
    plt.scatter(epoch, loss_train.data.item(), color='r', s=10, marker='o')
    plt.scatter(epoch, loss_val.data.item(), color='b', s=10, marker='o')
    plt.scatter(epoch, correct_train/Y_train.shape[0], color='m', s=10, marker='o') 
    plt.scatter(epoch, correct_val/Y_val.shape[0], color='c', s=10, marker='o')
    
    # print message with actual losses
    print('Train Epoch: {}/{} ({:.0f}%)\ttrain_Loss: {:.6f}\tval_Loss: {:.6f}'.format(
    epoch+1, num_epoch, epoch/num_epoch*100, loss_train.item(), loss_val.item()))
       

# show training and validation loss    
plt.legend(['train-loss','val-loss','train-acc','val-acc'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.savefig('/content/drive/My Drive/bda_lab/ex2/results/irisflower_loss.png')
#plt.show()

In [None]:
# %% Test results
# TODO**
# forward pass 
# Y_pred_test_oh is on the GPU, because net and X_test are on the GPU, but we want it on the CPU from now on.
Y_pred_test_oh = 
# compute and print losses
loss_test =
# TODO** 
print('Test loss before training was:', loss_test_before.item())
print('Test loss after training is:', loss_test.item())

# compute and print accuracies
y_pred_test = np.argmax(Y_pred_test_oh.cpu().detach().numpy(), axis=1)
correct = np.sum(y_pred_test == Y_test.numpy())
print('Test accuracy before training: ', correct_before/Y_test.shape[0]*100, '%')
print('Test accuracy after training: ', correct/Y_test.shape[0]*100, '%')

In [None]:
# evaluation module
clf = SVC(random_state=0)
clf.fit(X_train, y_pred_train)
SVC(random_state=0)
predictions = clf.predict(X_test)

#%% plot test confusion matrix
cm = confusion_matrix(y_pred_test, predictions, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=clf.classes_)
disp.plot()
plt.show()
plt.savefig('/content/drive/My Drive/bda_lab/ex2/results/irisflower_confusion.png')