## PyTorch Introduction

Aufgabe:

* Trainieren Sie ein mehrschichtiges Neuronales Netz mit PyTorch.
* Zeigen Sie grafisch auf, dass Ihr Modell konvergiert.


#### Import & Settings

In [134]:
# in this code block we gather all imports

# PyTorch
import torch

# numpy is just used in the beginning
import numpy as np

# MinMaxScaler for data normalization
from sklearn.preprocessing import MinMaxScaler

# for some plots and the scatter matrix
from matplotlib import pyplot as plt
from matplotlib import colormaps as cm

# pandas for reading the CSV and for use with the library ppscore
import pandas as pd
from pandas.api.types import CategoricalDtype

# ppscore for exploratory data analysis
import ppscore as pps

# more statistics for exploratory data analysis
from scipy import stats


In [135]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('This Computation is running on {}.'.format(device))

This Computation is running on cpu.


#### Load Dataset

In [136]:
# in this code block we read the CSV file and perform some basic preprocessing

# read the CSV file
# using separator character semicolon
dataset_known_pd = pd.read_csv("../../data/iris_binary.csv", sep=',', skipinitialspace=True)

# make column names pythonic
# so that they can be used in code where applicable
dataset_known_pd.columns = dataset_known_pd.columns.str.replace(" ", "_")

# on a side note we choose to sort the data frame by the first column 
dataset_known_pd.sort_values(by='sepal_length', ascending=True, axis=0, inplace=True)

dataset_known_np = dataset_known_pd.to_numpy()

x_known_pd = dataset_known_pd.copy().drop('class', axis=1)
x_known_np = x_known_pd.to_numpy()

n_samples = x_known_np.shape[0]
print(f"n_samples={n_samples}")

n_features = x_known_np.shape[1]
print(f"n_features={n_features}")

y_known_pd = dataset_known_pd['class'].copy()
y_known_np = y_known_pd.to_numpy()

n = dataset_known_np.shape[0]
assert dataset_known_np.shape == (n,5)
assert x_known_np.shape == (n,4)
assert y_known_np.shape == (n,)


n_samples=150
n_features=4


In [137]:
dataset_known_pd

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
22,4.3,3.0,1.1,0.1,0
128,4.4,2.9,1.4,0.2,0
141,4.4,3.0,1.3,0.2,0
3,4.4,3.2,1.3,0.2,0
47,4.5,2.3,1.3,0.3,0
...,...,...,...,...,...
119,7.7,3.0,6.1,2.3,1
13,7.7,3.8,6.7,2.2,1
44,7.7,2.6,6.9,2.3,1
56,7.7,2.8,6.7,2.0,1


In [138]:
# in this code block we copy the dataset over to PyTorch

dataset_known = torch.from_numpy(dataset_known_np)
#print(dataset_known)

In [139]:
# in this code block we initialize the model and the weights

w = torch.rand(n_features,requires_grad=True)
w

tensor([0.3586, 0.5919, 0.9237, 0.9286], requires_grad=True)

In [140]:
# in this code block we split the dataset randomly into train and test data
dataset_known_subsets = torch.utils.data.random_split(dataset_known, [int(n_samples * 0.7), int(n_samples * 0.3)])

dataset_known_train_subset = dataset_known_subsets[0]
dataset_known_test_subset = dataset_known_subsets[1]

assert len(dataset_known_train_subset) == 105
assert len(dataset_known_test_subset) == 45
