## Loading the Data

In [2]:
import torch
import torchvision
from torch.utils import data
from torchvision import transforms

In [6]:
data_transform = transforms.ToTensor() # Obtaining data to tensor converter

In [8]:
mnist_train = torchvision.datasets.FashionMNIST(root = "../data", train = True, transform = data_transform, download= True)  # Defining fashion MNIST train from torch datasets

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ../data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ../data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [16]:
mnist_train.train_labels, mnist_train # Get back a class that contains training data and labels



(tensor([9, 0, 0,  ..., 3, 0, 5]),
 Dataset FashionMNIST
     Number of datapoints: 60000
     Root location: ../data
     Split: Train
     StandardTransform
 Transform: ToTensor())

In [14]:
mnist_test = torchvision.datasets.FashionMNIST(root = "../data", train = False, transform = data_transform, download = True)

In [15]:
mnist_test # Test set Tensor transformed display

Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: ../data
    Split: Test
    StandardTransform
Transform: ToTensor()

## Defining a Data Iterator

In [26]:
batch_size = 128
# Defining iterator to iterate through training set
train_data_loader = data.DataLoader(mnist_train, batch_size, shuffle = True, num_workers = 4)

In [27]:
# Defining identical data loader fo test set
test_data_loader = data.DataLoader(mnist_test, batch_size, shuffle = True, num_workers = 4)

In [30]:
# Converting to function for future use, default num_workers is 4 bc CPU threads
def load_fashion_mnist(batch_size: int = 128, num_workers: int = 4):
    data_transform = transforms.ToTensor() # Obtaining data to tensor converter
    
    # Downloading data
    mnist_train = torchvision.datasets.FashionMNIST(root = "../data", train = True, transform = data_transform, download= True)  # Defining fashion MNIST train from torch datasets
    mnist_test = torchvision.datasets.FashionMNIST(root = "../data", train = False, transform = data_transform, download = True)
    
    # Loading data onto an iterator
    train_data_loader = data.DataLoader(mnist_train, batch_size, shuffle = True, num_workers = 4)
    test_data_loader = data.DataLoader(mnist_test, batch_size, shuffle = True, num_workers = 4)
    
    # Returning iterator
    return train_data_loader, test_data_loader 
    

## Softmax Regression Implementation

In [None]:
train_iter, test_iter = load_fashion_mnist(128, 4) # Loading train and test iterators for softmax implementation

In [35]:
# Softmax regression: map from an input to label probabilities (class confidences) in continuous space
# Regress using gradient towards a solution which minimizes error.

# Will flatten input image
input_img_size = 784
output_space = 10

# need to use weights to map from input space (784) to output space (each of 10 cols weights 784 pixels in a featurous way so as to produce output)
w = torch.normal(0, 0.1, (input_img_size, output_space), requires_grad = True) # Initializing around 0 (with a small SD so not exactly)
b = torch.zeros(output_space, requires_grad = True) # want each neuron to have a linear bias shifer

In [41]:
# Quick summing review: remember that largest dim = innermost/ most nested dim
X = torch.Tensor([[1.0, 2.0, 3.0], [7.0, 8.0, 9.0], [27.0, 26.2, 1.1]])
X.sum(0, keepdims = True) # Allows maintanence of nested dimension even though it has collapsed (there is no need for it, it is 1)

tensor([[35.0000, 36.2000, 13.1000]])

In [44]:
X.sum((0,1), keepdims = True), X.sum((0,1)), X.sum() # Collapse dimension removes uneccessary 
# dim = 1, no collapse => remains nested, otherwise total sum will yield a scalar

(tensor([[84.3000]]), tensor(84.3000), tensor(84.3000))

**Quick conceptual understanding of sums**:

If the largest dimension is the innermost, that can be thought of as a row/record in which the values belong in 1 dimensional data, in 2 dimensional data this is the last 2 dimensions. The representation of a record can be summed across all records which is the next most nested dimension, which could then be summed across all tables (3rd most inner dimension). Hence summing across the final 2 dims in 1D data is summing across the whole table. Keep dimension simply groups all summed attributes in a single attribute and does not delete it due to uselessness.

In [45]:
def softmax(X: torch.Tensor):
    # Mapped to the positive space with the magnitudinal differences of the exponential
    exponentiated_activations = torch.exp(X)
    sum_exponentiated_activations = exponentiated_activations.sum(1, keepdims = True)
    
    # Note: put under complex variable names for understanding
    mapped_probabilities = exponentiated_activations/sum_exponentiated_activations
    return mapped_probabilities

In [47]:
softmax(torch.Tensor([[0.2, 1.2, 2.4]])) # Exponential differences in confidence visible (+ 1 =occupies 2.7x more of exponentiated sum)
# Benefits of softmax - maps to 0,1 space but assigns tiny probabilities to negative activations relative to positive if positive exist, order of magnitude less
# Maps relative to other confidences
# Assigns non-negligible probabilities to every event but shows initial confidence in high activations

tensor([[0.0785, 0.2133, 0.7082]])

In [49]:
# Accuracy = sum(predictions == lables) / # of labels = rate of correctness


## Neural Network Implementation

In [52]:
# All layer names are in torch.nn and are capitalized
model = torch.nn.Sequential(torch.nn.Flatten(), torch.nn.Linear(input_img_size, output_space)) # Autoflatten data to 1D layer

In [54]:
# Defining a trainer
trainer = torch.optim.AdamW(model.parameters(), lr = 0.03)
loss = torch.nn.CrossEntropyLoss() #losses in torch.nn, just as layers are, also capitalized

In [56]:
# Initializing weights randomly and applying to Neural Network
# PyTorch apply works on a per-layer basis
def init_weights(layer: torch.nn):
    if isinstance(layer, torch.nn.Linear): # Note: weight used, not weights in plural in pytorch
        # init.normal initializes any torch layer parameter with normally distributed values
        torch.nn.init.normal_(layer.weight, mean = 0, std = 0.1) # Initializing normal weights by default, bias is 0   

model.apply(init_weights) # Autoinitialize any linear weights with normal inputs
        

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=10, bias=True)
)

In [58]:
model[1].weight.data # Proof of autoinitialized weights (flatten layer has no weights, naturally)

tensor([[ 1.6129e-01,  1.2880e-01,  1.2191e-01,  ...,  2.0155e-03,
          4.3984e-02, -3.7871e-02],
        [-2.0345e-02,  8.8198e-02, -7.1088e-02,  ..., -1.4631e-02,
         -3.6459e-02, -6.8571e-02],
        [ 3.4633e-02, -1.6811e-01,  7.9087e-02,  ..., -1.6673e-04,
          1.4304e-01, -5.6963e-03],
        ...,
        [ 1.1847e-01, -6.5290e-02, -2.9943e-02,  ...,  9.9582e-03,
          1.3712e-01, -4.5925e-02],
        [ 5.3444e-02,  7.8510e-03, -4.6245e-03,  ...,  6.4338e-02,
          1.7061e-01, -5.1979e-02],
        [ 8.2797e-02,  1.5005e-01,  6.7020e-02,  ..., -1.5266e-01,
          7.0909e-02, -8.4311e-02]])