<a href="https://colab.research.google.com/github/funfarmer/PYTHON-FOR-EVERYBODY/blob/master/intro_pytorch_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Objectives

- Compute Neural Network forward pass by hand
- Use `pytorch` to build regression model
- Use `pytorch` to build classification models
- Understand how to use the `Dataset` and DataLoader` objects in `pytorch`

In [None]:
import torch
import torch.nn as nn
from torch.utils import data
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt

#### Regression with `pytorch`

In [None]:
X = torch.randn(100, 1)*10
y = X + torch.randn(100, 1)*2.5

In [None]:
plt.scatter(X, y)

In [None]:
model = nn.Linear(in_features = 1, out_features = 1)

In [None]:
print(model.bias, model.weight)

In [None]:
x = torch.tensor([5.0])

In [None]:
model(x)

In [None]:
model.weight*5 + model.bias

In [None]:
#using the sequential approach
model = nn.Sequential(nn.Linear(1, 1))

In [None]:
model(x)

In [None]:
import torch.nn as nn

In [None]:
class LinearRegression(nn.Module):
  def __init__(self, input_size, output_size):
    super().__init__()
    self.linear = nn.Linear(input_size, output_size)

  def forward(self, x):
    return self.linear(x)

In [None]:
lr = LinearRegression(2, 1)

In [None]:
list(lr.parameters())

In [None]:
x = torch.tensor([[3.2, 1.1]])

In [None]:
lr.forward(x)

In [None]:
lr = LinearRegression(1, 1) # model

In [None]:
criterion = nn.MSELoss() # loss

In [None]:
X =( X - X.mean() )/ X.std()

In [None]:
optimizer = torch.optim.SGD(lr.parameters(), lr = 0.01) # optimizer

In [None]:
epochs = 1000
losses = []
for i in range(epochs):
  yhat = lr.forward(X)
  loss = criterion(yhat, y)
  if i%100 == 0:
    print(f'Epoch {i}, loss {loss.item()}')
  losses.append(loss)
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

In [None]:
#plot the losses
plt.plot(range(epochs), [i.detach().numpy() for i in losses], '--r')

In [None]:
w, b = list(lr.parameters())

In [None]:
list(lr.parameters())

In [None]:
plt.scatter(X, y)
plt.plot(X, w.detach()*X + b.detach(), '-r')

### Classification Example

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
cancer = load_breast_cancer()

In [None]:
X = cancer.data
y = cancer.target

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
sscaler = StandardScaler()
X = sscaler.fit_transform(X)

In [None]:
X.shape

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Linear(30, 32)
        self.layer_2 = nn.Linear(32,32)
        self.layer_out = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, X):
        x = self.relu(self.layer_1(X))
        x = self.relu(self.layer_2(x))
        x = self.layer_out(x)
        x = self.sigmoid(x)
        return x

In [None]:
model = Classifier()

In [None]:
X = torch.from_numpy(X)
y = torch.from_numpy(y.reshape(-1, 1))
X = X.float()
y = y.float()

In [None]:
def training_loop(n_epochs, optimizer, model, loss_fn, x_train, y_train):
    for epoch in range(1, n_epochs + 1):
        t_p_train = model(x_train)
        loss_train = loss_fn(t_p_train, y_train)
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
        if epoch == 1 or epoch % 1000 == 0:
            print(epoch, loss_train.item())

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001)

In [None]:
training_loop(n_epochs=5000, optimizer=optimizer, model = model, loss_fn=nn.BCELoss(),
             x_train=X, y_train=y)

In [None]:
model(X)

In [None]:
preds = np.where(model(X) >= 0.5, 1, 0)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
confusion_matrix(y, preds)

In [None]:
print(classification_report(y, preds))

In [None]:
np.unique(y, return_counts = True)

In [None]:
(192)/(192 + 20)

In [None]:
357/(357 + 212)

In [None]:
(195 + 354)/(195 + 354 + 4 + 20)

### Using the `Dataset` and `DataLoader`

In [None]:
class MyData(data.Dataset):
  #loads data into class
  def __init__(self, X, Y):
    self.X = X
    self.Y = Y

  #determines how many data samples exist
  def __len__(self):
    return len(self.Y)

  #returns the sample based on index
  def __getitem__(self, index):
    X = self.X[index].float().reshape(-1)
    Y = self.Y[index].long()
    return X, Y

In [None]:
from sklearn.datasets import make_classification

In [None]:
X, Y = make_classification()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y)

In [None]:
train_data = MyData(torch.tensor(X_train),
                    torch.tensor(y_train))

In [None]:
train_loader = DataLoader(train_data,
                          shuffle = True,
                          batch_size = 64,
                          num_workers = 4,
                          pin_memory = True)

In [None]:
train_data.__getitem__(0)

#### Using built-in datasets

In [None]:
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

In [None]:
train_data = datasets.FashionMNIST(root = 'data', download = True, transform=ToTensor())

In [None]:
test_data = datasets.FashionMNIST( root = 'data', train = False, download = True, transform = ToTensor())

In [None]:
img, label = train_data[0]

In [None]:
plt.imshow(img[0].squeeze(), cmap = 'gray')

In [None]:
train_dataloader = DataLoader(train_data, batch_size = 20, shuffle = True)
test_dataloader = DataLoader(test_data, batch_size = 20, shuffle = False)

In [None]:
train_ims, train_labels = next(iter(train_dataloader))

In [None]:
train_ims.shape

In [None]:
plt.imshow(train_ims[0].squeeze(), cmap = 'gray')

In [None]:
len(train_ims)

#### Creating a custom dataset

In [None]:
train_im_sample = []
train_label_sample = []
for i in range(20):
  train_im_sample.append(train_data[i][0].numpy())
  train_label_sample.append(train_data[i][1])

In [None]:
ims_npy = np.array(train_im_sample)

In [None]:
np.save('ims_npy.npy', ims_npy)

In [None]:
labels_npy = np.array(train_label_sample)

In [None]:
np.save('labels_npy.npy', labels_npy)

In [None]:
class CustomImageDataset(Dataset):

  def __init__(self, imgs, labels):
    self.imgs = np.load(imgs)
    self.labels = np.load(labels)

  def __len__(self):
    return len(self.labels)

  def __getitem__(self, index):
    im = self.imgs[index]
    label = self.labels[index]
    return im, label


In [None]:
train_data_custom = CustomImageDataset('ims_npy.npy', 'labels_npy.npy')

In [None]:
train_dataloader = DataLoader(train_data_custom, batch_size = 5, shuffle = True)

In [None]:
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels[0]
plt.imshow(img, cmap="gray")
plt.show()
print(f"Label: {label}")

#### Example with `numpy`

In [None]:
import seaborn as sns

In [None]:
iris = sns.load_dataset('iris')

In [None]:
iris.head()

In [None]:
iris['species'].value_counts()

In [None]:
X = iris.drop('species', axis = 1)
y = iris['species'].replace({'setosa': 0, 'virginica': 1, 'versicolor': 2})

In [None]:
type(X)

In [None]:
type(y)

In [None]:
class IrisData(Dataset):
  def __init__(self, X, y):
    self.x = X
    self.y = y

  def __len__(self):
    return len(self.y)

  def __getitem__(self, index):
    measures = torch.tensor(self.x.iloc[index].values)
    label = torch.tensor(self.y.iloc[index].values)
    return measures, label

In [None]:
train_data = IrisData(X, y)

In [None]:
iris_loader = DataLoader(train_data, batch_size = 20)

In [None]:
class IrisModel(nn.Module):
  def __init__(self, X, y):
    super().__init__()
    self.linear_1 = nn.Linear(in_features = 4, out_features = 32)
    self.linear_2 = nn.Linear(in_features = 32, out_features = 3)
    self.relu = nn.ReLU()
    self.softmax = nn.Softmax()

  def forward(self, X):
    x = self.relu(self.linear_1(X))
    x = self.linear_2(x)
    x = self.softmax(x)
    return x

In [None]:
iris = IrisModel(X, y)

In [None]:
iris

In [None]:
loss_fn = nn.CrossEntropyLoss()

In [None]:
optimizer = torch.optim.SGD(iris.parameters(), lr = 0.01)