In [None]:
"""
option ->  (training, test), necessary for load_mnist(option, path) function.
"""

class DatasetFashion():
    
    def __init__(self, option, classes = range(10), crossvalidation=None, path='./'):
        X,y = load_mnist(option, path)       #loading function which provides the dataset in suitable format

        self.images = X.reshape([-1, 28*28])  #dimensions are arranged (All pixels have to be in columns)
        self.images = self.images/255.0       #normalization of pixel values to [0,1] range
        self.labels = y.type(th.long) 

        #eliminating unwanted classes and sorting at the same time
        indices = th.cat([th.nonzero(self.labels == i) for i in classes], dim=0)
        self.images = self.images[indices]
        self.labels = self.labels[indices]
        
        #shuffling
        indices = th.randperm(len(self.labels))
        self.images = self.images[indices]
        self.labels = self.labels[indices]
          
        if crossvalidation:
            splitSize = len(self.labels)//crossvalidation[0]
            chunks = th.split(th.arange(len(self.labels)), splitSize, dim=0)
            validation = chunks[crossvalidation[1]]
            training = th.cat([c for j, c in enumerate(chunks) if j!=crossvalidation[1]], dim=0)
            self.validationImages = self.images[validation]
            self.validationLabels = self.labels[validation]
            self.trainingImages = self.images[training]
            self.trainingLabels = self.labels[training]

        else:    
            split_ratio = int(len(self.images)*4/5)
            
            #split the data
            self.trainingImages = self.images[:split_ratio]
            self.trainingLabels = self.labels[:split_ratio]
            self.validationImages = self.images[split_ratio:]
            self.validationLabels = self.labels[split_ratio:]

    def __len__(self):
        return len(self.trainingLabels)

    def __getitem__(self, index):
        return self.trainingImages[index], self.trainingLabels[index]
            
class Network(th.nn.Module):
    def __init__(self, num_inputs, num_classes, num_neurons=(50,20), activation=th.relu):
        super(Network, self).__init__()
        self.activation = activation
        self.logsoftmax = th.nn.LogSoftmax(dim=1)
        
        self.layers = th.nn.ModuleList()
        self.layers.append(th.nn.Linear(num_inputs, num_neurons[0], True))
        for i in range(len(num_neurons)-1):
            self.layers.append(th.nn.Linear(num_neurons[i], num_neurons[i+1], True))
        self.layers.append(th.nn.Linear(num_neurons[-1], num_classes, True))
        
    def forward(self, x):
        for layer in self.layers[:-1]:
            x = self.activation(layer(x))
        return self.logsoftmax(self.layers[-1](x))
    
def train(model, dataloader, optimizer):
    
    loss_hist = np.array([])
    for index, (trainingImages,trainingLabels) in enumerate(dataloader):
        optimizer.zero_grad()
        prediction = model(trainingImages.squeeze())
        loss = th.nn.functional.nll_loss(prediction, trainingLabels.squeeze())
        loss_hist = np.append(loss_hist, loss.item())
        loss.backward()
        optimizer.step()
        with th.no_grad():
            if index % 100 == 0:
                print('Train {}/{} Loss {:.6f}'.format(index, len(dataloader), loss.item()))
    return loss_hist
                
def evaluate(model, images, labels):
    prediction = model(images.squeeze())
    loss = th.nn.functional.nll_loss(prediction, labels.squeeze())
    pred_label = th.argmax(prediction, dim=1)
    #print("----------", pred_label[:10], labels.squeeze()[:10], pred_label[:10] == labels.squeeze()[:10])
    accuracy = ((pred_label == labels.squeeze()).sum().item()) / len(images)
    return loss, accuracy


In [None]:
if __name__ == "__main__":
    
    model = Network(28*28, 10)
    optimizer = th.optim.SGD(model.parameters(), lr=0.01)
 
    training_loss_hist = np.array([])   #also validation loss and accuracy history can be kept
        
    dataset = DatasetFashion("training") 
    dataloader = th.utils.data.DataLoader(dataset, batch_size=64, shuffle=False)

    for epoch in range(10):
        print("Epoch ", epoch)
        #training
        loss_hist_per_epoch = train(model, dataloader, optimizer)

        #loss and accuracy histories are kept
        training_loss_hist = np.hstack((training_loss_hist, loss_hist_per_epoch))
        
    #evaluation
    valid_loss, valid_accuracy = evaluate(model, dataset.validationImages, dataset.validationLabels)

In [None]:
if __name__ == "__main__":
    
    model = Network(28*28, 10, activation = th.sigmoid)
    optimizer = th.optim.SGD(model.parameters(), lr=0.01)
    
    training_loss_hist = np.array([])   #also validation loss and accuracy history can be kept
    avg_loss = 0
    for i in range(5):
        
        dataset = DatasetFashion("training", crossvalidation=(5,i))
        dataloader = th.utils.data.DataLoader(dataset, batch_size=64, shuffle=True)
        
        for epoch in range(10):
            #training
            loss_hist_per_epoch = train(model, dataloader, optimizer)

            #loss and accuracy histories are kept
            training_loss_hist = np.hstack((training_loss_hist, loss_hist_per_epoch))
            
        #evaluation
        valid_loss, valid_accuracy = evaluate(model, dataset.validationImages, dataset.validationLabels)
            
        avg_loss += evaluate(model, dataset.validationImages, dataset.validationLabels)/5
        
    print("Average Loss", avg_loss)