In [18]:
from mytorch.loss import CrossEntropyLoss
from mytorch.optim import Adam
from mytorch.dataloader import Dataloader
import numpy as np
import random
import pickle
from model import Model
from matplotlib import pyplot as plt
%matplotlib inline

In [19]:
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
epochs = []
def plot(ax, epoch, train_loss, val_loss, train_accuracy, val_accuracy):
    epochs.append(epoch)
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_accuracy)
    val_accuracies.append(val_accuracy)
    ax[0].cla()
    ax[0].plot(epochs, train_losses, label="train_loss")
    ax[0].plot(epochs, val_losses, label="val_loss")
    ax[1].cla()
    ax[1].plot(epochs, train_accuracies, label="train_accuracy")
    ax[1].plot(epochs, val_accuracies, label="val_accuracy")
    plt.show()

In [20]:
class Dataset:
    def __init__(self, img, target, transform=None):
        self.img = img
        self.target = target
        self.transform = transform
    
    def __len__(self):
        return len(self.img)

    def __getitem__(self, idx):
        img = self.img[idx]
        if self.transform:
            img = self.transform(images = img.astype(np.uint8))
        img = (img-img.min())/img.max()
        target = self.target[idx]
        target = target*0.9+0.1/15.
        return img, target

In [21]:
def random_split(data, target, length):
    indices = list(range(len(data)))
    random.shuffle(indices)
    train_len = int(len(data)*length)
    val_len = len(data)-train_len
    train_data = [data[indices[i]] for i in range(train_len)]
    train_target = [target[indices[i]] for i in range(train_len)]
    val_data = [data[indices[i]] for i in range(train_len, train_len+val_len)]
    val_target = [target[indices[i]] for i in range(train_len, train_len+val_len)]
    return train_data, train_target, val_data, val_target

In [22]:
def accuracy(pred, target):
    y = np.argmax(pred.a, axis=1)
    t = np.argmax(target.a, axis=1)
    count = np.where(y == t, 1, 0)
    count = np.sum(count)
    return count/len(pred.a)

In [23]:
epoch = 500
batch_size = 32
lr = 1e-2
train_length = 0.8
path = "./weights/"

In [24]:
import imgaug.augmenters as iaa
augseq = iaa.Sequential([
    iaa.Crop(percent=(0, 0.03)),
    iaa.Sometimes(0.5, iaa.GaussianBlur(sigma=(0, 0.1))),
    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
    iaa.Affine(
    scale={"x": (0.9, 1.1), "y": (0.9, 1.1)},
    translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)},
    rotate=(-15, 15),
    shear=(-3, 3)
    ),
    ], random_order=True
)
model = Model()
celoss = CrossEntropyLoss()
optim = Adam(model.get_params(), lr=lr)

train_data = np.load("./1_data/train_data.npy")
train_label = np.load("./1_data/train_label.npy")
train_x, train_y, val_x, val_y = random_split(train_data, train_label, train_length)
train_dataset = Dataset(train_x, train_y, augseq)
val_dataset = Dataset(val_x, val_y)
dataloader = Dataloader(train_dataset, batch_size, True)
val_dataloader = Dataloader(val_dataset)
size = len(train_dataset)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(20, 7))
for e in range(epoch):
    progress = 0
    model.train()
    t_ac = 0
    t_loss = 0
    i = 0
    for x, target in dataloader:
        i += 1
        progress += batch_size
        optim.zero_grad()
        y = model(x)
        loss = celoss(y, target)
        loss.backward()
        optim.step()
        bar = int(progress/size*40)
        t_ac += accuracy(y, target)
        t_loss += float(loss.a)
        if (model.conv1.weight.grad.max() < 1e-10):
            print(" !! conv1.weight.grad zero")
        print("\r{}/{}[{}]loss: {} accuracy: {}".format(e,epoch,"="*bar+"-"*(40-bar),t_loss/i, t_ac/i), end="")
        del loss
    t_ac /= i+1e-7
    t_loss /= i+1e-7
    v_ac = 0
    v_loss = 0
    i = 0
    model.eval()
    for val_x, val_y in val_dataloader:
        i += 1
        y = model(val_x)
        v_ac += accuracy(y, val_y)
        v_loss += float(celoss(y, val_y).a)
    v_ac /= i+1e-7
    v_loss /= i+1e-7
    plot(ax, e, t_loss, v_loss, t_ac, v_ac)
    print(f"\nloss: {t_loss}, val_loss: {v_loss}, accuracy: {t_ac}, val_accuracy: {v_ac}")
    if (e+1)%10 == 0:
        with open(path+f"{e}_weights{ac}.pkl", "wb") as f:
            pickle.dump(model.state_dict(), f)

loss: 1.8407277319315072, val_loss: 1.8798197871550077, accuracy: 0.07749999989666667, val_accuracy: 0.08177083251562502
loss: 1.8211827504133995, val_loss: 1.8241792449608063, accuracy: 0.10458333319388889, val_accuracy: 0.11406249885937501
2/500[----------------------------------------]loss: 1.8592406496377258 accuracy: 0.09375