[![open in colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1DSSHN_lRd0A_tPBwYBi6zlOd_9N1DBJ3#scrollTo=dpz7yKFTYXPZ)

## HW Requirement

• Rewrite the best code (of the weight-tuning_EB
referring to page 14) you have for HW #1 into 
the code of the weight-tuning_LG. Best means the 
best hyperparameter setting.


• Rewrite the best code (of the weight-tuning_EB
referring to page 14) you have for HW #1 into 
the code of the weight-tuning_EB_LG.


• Once you have the code, you will apply the code 
to learn your dataset for HW #1.


• The training and test dataset is 80%/20%.


• The performance comparison benchmark is your 
best weight-tuning_EB.

Model 6 \
hidden nodes: 11 \
epochs: 300 \
init: xavier \
active: relu \
optimize: sgd \
schedule: None \
weight decay: 0.0

Model Accuracy: 82.17% \
Training Time: 639.923 s \
Epoch with hightest Train accuracy: 292, 86.19% \
Epoch with hightest Val accuracy= 249, 83.83%

## Model

In [1]:
import torch
from torch import nn, optim, Generator
from torch.utils.data import DataLoader, Dataset, random_split
import sys
import pandas as pd
import cProfile
import pstats

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
from typing import Iterable, Callable, Type
from operator import mul

def product(nums: Iterable[Type], func: Callable[[Type, Type], Type] = mul):
    def _product(nums):
        nonlocal func
        if len(nums) == 1:
            return nums[0]
        return func(nums[-1], _product(nums[:-1]))
    try:
        return _product(nums)
    except Exception as e:
        raise e

In [8]:
from collections.abc import Callable
class TwoLayerNetwork(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_classes: int, init_method:Callable, active_func:nn.modules.module.Module) -> None:
        super(TwoLayerNetwork, self).__init__()
        self.input_size = input_size
        self.hidden_size= hidden_size
        ## first layer
        self.fc1 = nn.Linear(input_size, hidden_size)
        ## activation
        self.active_func = active_func()
        ## initialize
        for param in self.parameters():
            init_method(param)
        ## second layer
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.active_func(out)
        out = self.fc2(out)
        return out


In [14]:
def train(model: TwoLayerNetwork, opt: nn.Module, device: str, epochs: int, learning_rate: float, trainloader: DataLoader, valloader: DataLoader, criterion: nn.modules.loss._Loss, sched: optim.lr_scheduler._LRScheduler, weight_decay: float, learning_goal: float):
    if epochs < 1:
        raise ValueError("Invalid epoch!!")
    else:
        epochs = int(epochs)
    model.to(device)
    optimizer = opt(model.parameters(), lr=learning_rate,
                    weight_decay=weight_decay)
    scheduler = sched(optimizer) if sched else None
    history = []
    # Train the model
    for epoch in range(epochs):
        train_loss = 0.0
        train_correct = 0
        model.train()
        for X, y in trainloader:
            X = X.view(-1, model.input_size).to(device)
            y = y.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * X.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_correct += (predicted == y).sum().item()
        train_loss /= len(trainloader.dataset)
        train_accuracy = train_correct / len(trainloader.dataset)

        # Validate the model
        val_loss = 0.0
        val_correct = 0
        model.eval()
        with torch.no_grad():
            for X, y in valloader:
                X = X.view(-1, model.input_size).to(device)
                y = y.to(device)
                outputs = model(X)
                loss = criterion(outputs, y)
                val_loss += loss.item() * X.size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_correct += (predicted == y).sum().item()
            val_loss /= len(valloader.dataset)
            val_accuracy = val_correct / len(valloader.dataset)
        if scheduler:
            scheduler.step()
        # Print epoch statistics
        history.append((train_loss, train_accuracy, val_loss, val_accuracy))
        if learning_goal< val_accuracy:
            return history
        # sys.stdout.write('Epoch [{}/{}], Train Loss: {:.4f}, Train Accuracy: {:.2f}%, Val Loss: {:.4f}, Val Accuracy: {:.2f}%\n'
        #       .format(epoch+1, epochs, train_loss, train_accuracy, val_loss, val_accuracy))
    return history


In [10]:
def test(model:nn.Module, device:str, testloader:DataLoader):
    val_correct = 0
    model.to(device)
    model.eval()
    with torch.no_grad():
        for X, y in testloader:
            X = X.view(-1, model.input_size).to(device)
            y = y.to(device)
            outputs = model(X)
            _, predicted = torch.max(outputs.data, 1)
            val_correct += (predicted == y).sum().item()
        val_accuracy = val_correct / len(testloader.dataset)
    return val_accuracy

# Dataset

### pytorch dataset

In [5]:
# load pytorch dataset

from torchvision import datasets, transforms

def getPytorchData(train: float = 0.8, remain: float = 0.1):
    """
    Args:
        train: train_amount / total_amount or 1 - valid_amount / total_amount
        remain: reduce data amount to save time
    """
    # preprocess: flatten, normalize, drop 90%, split
    transform = transforms.transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    if 0 >= train or train >= 1:
        raise ValueError()
    if 0 > remain or remain > 1:
        raise ValueError()
    # Split the training set into training and validation sets
    trainset = datasets.FashionMNIST(
        root="./data/", train=True, download=False, transform=transform)
    train_count = int(train * remain * len(trainset))
    valid_count = int((1-train) * remain * len(trainset))
    if train_count * valid_count == 0:
        raise ValueError()
    datum_size = product(trainset[0][0].size())
    class_amount = len(trainset.classes)
    testset = datasets.FashionMNIST(
        root="./data/", train=False, download=False, transform=transform)
    print(train_count, valid_count, len(testset))
    trainset, valset, _ = random_split(
        trainset, (train_count, valid_count, len(trainset)-train_count-valid_count), Generator().manual_seed(42))
    # Create dataloaders to load the data in batches
    trainloader = DataLoader(trainset, batch_size=32, shuffle=True)
    valloader = DataLoader(valset, batch_size=32, shuffle=True)
    testloader = DataLoader(testset, batch_size=32, shuffle=True)
    return trainloader, valloader, testloader, datum_size, class_amount


## Training

### train

In [11]:
device = "cuda" if torch.cuda.is_available(
) else "mps" if torch.backends.mps.is_available() else "cpu"
# hyper parameters
trainloader, valloader, testloader, input_size, output_size = getPytorchData()
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()

4800 1199 10000


### epoch bound

In [15]:
hidden_size = 11
epochs = 300
init = lambda x: nn.init.xavier_uniform_(
    tensor=x) if len(x.shape) > 1 else None
active = nn.ReLU
optimize = optim.SGD
schedule = None
weight_decay = 0.0
learning_goal = 100.
model = TwoLayerNetwork(input_size, hidden_size,
                        output_size, init, active)
EB_baseline = test(model, device, testloader)
EB_history = train(model, optimize, device, epochs, learning_rate,
                   trainloader, valloader, criterion, schedule, weight_decay, learning_goal)
EB_result = test(model, device, testloader)
print(EB_baseline, EB_history, EB_result, sep="\n")


0.1032
[(2.3703084659576414, 0.12458333333333334, 2.2665171641126287, 0.16930775646371976), (2.2073361444473267, 0.19708333333333333, 2.161963452887197, 0.22101751459549623), (2.10985373655955, 0.24083333333333334, 2.070713071648134, 0.2702251876563803), (2.015818354288737, 0.27708333333333335, 1.975842498162073, 0.29608006672226855), (1.9175854190190633, 0.30833333333333335, 1.8757808720498805, 0.3286071726438699), (1.818630797068278, 0.341875, 1.776828143475352, 0.36613844870725604), (1.7220336031913757, 0.37666666666666665, 1.6795025864076973, 0.3969974979149291), (1.627836935520172, 0.40520833333333334, 1.5852018527133551, 0.4278565471226022), (1.538087814648946, 0.440625, 1.4964610224867783, 0.46872393661384487), (1.455324464639028, 0.48270833333333335, 1.4158306419103717, 0.5012510425354462), (1.380937574704488, 0.518125, 1.343318090029216, 0.5371142618849041), (1.314035340944926, 0.5489583333333333, 1.2780349855128679, 0.5613010842368641), (1.253947012424469, 0.5677083333333334,

### epoch bound and learning goal

In [16]:
hidden_size = 11
epochs = 300
def init(x):
    return nn.init.xavier_uniform_(tensor=x) if len(x.shape) > 1 else None
active = nn.ReLU
optimize = optim.SGD
schedule = None
weight_decay = 0.0
learning_goal = EB_result #
model = TwoLayerNetwork(input_size, hidden_size,
                        output_size, init, active)
LG_baseline = test(model, device, testloader)
LG_history = train(model, optimize, device, epochs, learning_rate,
                trainloader, valloader, criterion, schedule, weight_decay, learning_goal)
LG_result = test(model, device, testloader)
print(LG_baseline, LG_history, LG_result, sep="\n")


0.0657
[(2.3361593596140544, 0.08416666666666667, 2.251970630372137, 0.1292743953294412), (2.2178289111455283, 0.175, 2.177703283546963, 0.20683903252710592), (2.1403216234842937, 0.24, 2.0896413039922517, 0.2810675562969141), (2.038500696818034, 0.3125, 1.9596197623029363, 0.37364470391993326), (1.8871492306391398, 0.408125, 1.7951224674474606, 0.451209341117598), (1.733130259513855, 0.46791666666666665, 1.64451773908359, 0.4987489574645538), (1.5931685694058737, 0.5122916666666667, 1.5111697553295806, 0.5254378648874062), (1.472662479877472, 0.5489583333333333, 1.4017814323243944, 0.5763135946622185), (1.3725199794769287, 0.5964583333333333, 1.310238278041391, 0.6146788990825688), (1.2900520916779836, 0.6166666666666667, 1.23744816994846, 0.6255212677231026), (1.221700081427892, 0.6347916666666666, 1.1752855070836352, 0.6321934945788157), (1.164270633459091, 0.6466666666666666, 1.1230694287215004, 0.6413678065054211), (1.115423686504364, 0.6583333333333333, 1.0800473437496183, 0.6563