In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
import torch.nn.init as init

%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt

from torch.nn.parameter import Parameter
from Data_loading import FASHION_MNIST, cifar10, train_val_split

In [None]:
import numpy as np
import urllib.request
import tarfile
import io
import gzip
def FASHION_MNIST(flatten=True, one_hot=False):
    base = "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/"
    get  = lambda name: gzip.decompress(urllib.request.urlopen(base+name).read())

    Xtr = np.frombuffer(get("train-images-idx3-ubyte.gz"), dtype=np.uint8, offset=16).reshape(-1, 28, 28) / 255.0
    ytr = np.frombuffer(get("train-labels-idx1-ubyte.gz"), dtype=np.uint8, offset=8)

    Xte = np.frombuffer(get("t10k-images-idx3-ubyte.gz"), dtype=np.uint8, offset=16).reshape(-1, 28, 28) / 255.0
    yte = np.frombuffer(get("t10k-labels-idx1-ubyte.gz"), dtype=np.uint8, offset=8)

    if flatten:
        Xtr = Xtr.reshape(len(Xtr), -1)
        Xte = Xte.reshape(len(Xte), -1)

    if one_hot:
        Ytr = np.zeros((ytr.size, 10))
        Yte = np.zeros((yte.size, 10))
        Ytr[np.arange(ytr.size), ytr] = 1
        Yte[np.arange(yte.size), yte] = 1
        return (Xtr, Ytr), (Xte, Yte)

    return (Xtr, ytr), (Xte, yte)

In [None]:
from sklearn.model_selection import train_test_split

(Xtr, ytr), (x_test, y_test) = FASHION_MNIST(flatten=True, one_hot=False)
x_train, x_valid, y_train, y_valid = train_test_split(Xtr, ytr, test_size=5000, random_state=42, shuffle=True)

#x_train, y_train, x_valid, y_valid = train_val_split(Xtr, ytr, val_size=5000, seed=42)

x_train = torch.from_numpy(x_train.copy())
y_train = torch.from_numpy(y_train.copy())

x_valid = torch.from_numpy(x_valid.copy())
y_valid = torch.from_numpy(y_valid.copy())

x_test = torch.from_numpy(x_test.copy())
y_test = torch.from_numpy(y_test.copy())

x_train = x_train.float()
x_valid = x_valid.float()
x_test  = x_test.float()

y_train = y_train.long()
y_valid = y_valid.long()
y_test  = y_test.long()

In [None]:
num_classes = 10
num_l1 = 512
num_features = x_train.shape[1]

class FNNP(nn.Module):
    def __init__(self,num_features,num_hidden_1,num_hidden_2,num_output):
        super(FNNP,self).__init__()
        self.W_1 = Parameter(init.xavier_normal_(torch.Tensor(num_hidden_1, num_features)))
        self.b_1 = Parameter(init.constant_(torch.Tensor(num_hidden_1), 0))
        # hidden layer 1
        self.W_2 = Parameter(init.xavier_normal_(torch.Tensor(num_hidden_2, num_hidden_1)))
        self.b_2 = Parameter(init.constant_(torch.Tensor(num_hidden_2), 0))

        # hidden layer 2
        self.W_2 = Parameter(init.xavier_normal_(torch.Tensor(num_hidden_2, num_hidden_1)))
        self.b_2 = Parameter(init.constant_(torch.Tensor(num_hidden_2), 0))


        self.activation = torch.nn.ReLU()
    
    def forward(self,x):
        x = F.linear(x, self.W_1, self.b_1)
        x = self.activation(x)
        x = F.linear(x,self.W_2,self.b_2)
        return x
network = FNNP(num_features, num_l1, num_classes)
network = network.float()

In [44]:
Parameter(init.xavier_normal_(torch.Tensor(1,2), 0))

Parameter containing:
tensor([[0., 0.]], requires_grad=True)

In [45]:
Parameter(init.constant_(torch.Tensor(2), 0))

Parameter containing:
tensor([0., 0.], requires_grad=True)

In [None]:
optimizer = optim.Adam(network.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

In [None]:
# we could have done this ourselves,
# but we should be aware of sklearn and its tools
from sklearn.metrics import accuracy_score

# setting hyperparameters and gettings epoch sizes
batch_size = 400
num_epochs = 150
num_samples_train = x_train.shape[0]
num_batches_train = num_samples_train // batch_size
num_samples_valid = x_valid.shape[0]
num_batches_valid = num_samples_valid // batch_size

# setting up lists for handling loss/accuracy
train_acc, train_loss = [], []
valid_acc, valid_loss = [], []
test_acc, test_loss = [], []
cur_loss = 0
losses = []

get_slice = lambda i, size: range(i * size, (i + 1) * size)

for epoch in range(num_epochs):
    # Forward -> Backprob -> Update params
    ## Train
    cur_loss = 0
    network.train()
    for i in range(num_batches_train):
        optimizer.zero_grad()
        slce = get_slice(i, batch_size)
        output = network(x_train[slce])
        
        # compute gradients given loss
        target_batch = y_train[slce]
        batch_loss = criterion(output, target_batch)
        batch_loss.backward()
        optimizer.step()
        
        cur_loss += batch_loss   
    losses.append(cur_loss / batch_size)

    network.eval()
    ### Evaluate training
    train_preds, train_targs = [], []
    for i in range(num_batches_train):
        slce = get_slice(i, batch_size)
        output = network(x_train[slce])
        
        preds = torch.max(output, 1)[1]
        
        train_targs += list(y_train[slce].numpy())
        train_preds += list(preds.data.numpy())
    
    ### Evaluate validation
    val_preds, val_targs = [], []
    for i in range(num_batches_valid):
        slce = get_slice(i, batch_size)
        
        output = network(x_valid[slce])
        preds = torch.max(output, 1)[1]
        val_targs += list(y_valid[slce].numpy())
        val_preds += list(preds.data.numpy())
        

    train_acc_cur = accuracy_score(train_targs, train_preds)
    valid_acc_cur = accuracy_score(val_targs, val_preds)
    
    train_acc.append(train_acc_cur)
    valid_acc.append(valid_acc_cur)
    
    if epoch % 10 == 0:
        print("Epoch %2i : Train Loss %f , Train acc %f, Valid acc %f" % (
                epoch+1, losses[-1], train_acc_cur, valid_acc_cur))

epoch = np.arange(len(train_acc))
plt.figure()
plt.plot(epoch, train_acc, 'r', epoch, valid_acc, 'b')
plt.legend(['Train Accucary','Validation Accuracy'])
plt.xlabel('Updates'), plt.ylabel('Acc')