# Libraries + Setting up PATH

In [1]:
import pandas as pd
from PIL import Image
from tqdm import tqdm
import os
from pathlib import Path
from glob import glob
import cv2

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.amp as amp
from torchsummary import summary

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import torchvision.transforms as transforms
import torchvision.transforms.v2 as v2

In [3]:
parentDir = Path(os.path.abspath(""))

# Dataset

## Dataset files

In [4]:
datasetDir = parentDir.joinpath("dataset-generator/data/poseLandmarkDataset")

In [5]:
csvFile = datasetDir.joinpath("coordinate.csv")
coordinateDf = pd.read_csv(csvFile)

In [6]:
coordinateDf

Unnamed: 0,image,head_x,head_y,neck_x,neck_y,torso_x,torso_y,left_shoulder_x,left_shoulder_y,left_elbow_x,...,left_foot_toe_x,left_foot_toe_y,right_hip_x,right_hip_y,right_knee_x,right_knee_y,right_heel_x,right_heel_y,right_foot_toe_x,right_foot_toe_y
0,20240925_192500_312.jpg,0.630556,0.205556,0.636111,0.258333,0.652778,0.402778,0.733333,0.305556,0.813889,...,0.708333,0.980556,0.597222,0.597222,0.586111,0.758333,0.580556,0.961111,0.530556,0.972222
1,20240928_105700_65.jpg,0.555556,0.044444,0.547222,0.127778,0.547222,0.288889,0.438889,0.169444,0.352778,...,0.450000,0.961111,0.608333,0.486111,0.613889,0.705556,0.630556,0.922222,0.677778,0.977778
2,20240925_192500_707.jpg,0.541667,0.233333,0.538889,0.313889,0.525000,0.450000,0.452778,0.352778,0.422222,...,0.441667,0.972222,0.569444,0.591667,0.613889,0.725000,0.644444,0.947222,0.691667,0.977778
3,20240928_105700_132.jpg,0.561111,0.072222,0.563889,0.147222,0.555556,0.302778,0.452778,0.188889,0.325000,...,0.458333,0.983333,0.619444,0.480556,0.613889,0.663889,0.625000,0.919444,0.663889,0.969444
4,20240928_105700_259.jpg,0.516667,0.044444,0.558333,0.086111,0.613889,0.288889,0.658333,0.197222,0.730556,...,0.616667,0.961111,0.550000,0.458333,0.522222,0.700000,0.572222,0.980556,0.488889,0.972222
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,20240925_192500_487.jpg,0.491667,0.236111,0.497222,0.302778,0.488889,0.444444,0.419444,0.358333,0.422222,...,0.427778,0.994444,0.547222,0.608333,0.541667,0.758333,0.530556,0.966667,0.550000,0.994444
447,20240925_192500_32.jpg,0.522222,0.180556,0.516667,0.250000,0.508333,0.436111,0.422222,0.302778,0.363889,...,0.441667,0.988889,0.555556,0.622222,0.533333,0.822222,0.538889,0.991667,0.000000,0.000000
448,20240925_192500_690.jpg,0.647222,0.241667,0.638889,0.294444,0.650000,0.455556,0.558333,0.341667,0.516667,...,0.597222,0.980556,0.694444,0.597222,0.686111,0.750000,0.661111,0.947222,0.697222,0.972222
449,20240928_105700_696.jpg,0.000000,0.000000,0.000000,0.000000,0.550000,0.291667,0.219444,0.013889,0.266667,...,0.000000,0.000000,0.716667,0.613889,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


## Dataset stuff

In [7]:
class landmarksDataset(Dataset):
    def __init__(self, csvFile, imageDir, transformer):
        self.transformer = transformer
        self.imageDir = imageDir
        self.sample = []

        df = pd.read_csv(csvFile)
        imagesFile = df.pop("image").values
        landmarks = df
        for index in range(len(imagesFile)):
            landmarkCoordinates = landmarks.iloc[index].values
            self.sample.append((
                f"{imageDir}/{imagesFile[index]}",
                (
                    (landmarkCoordinates[0], landmarkCoordinates[1]),
                    (landmarkCoordinates[2], landmarkCoordinates[3]),
                    (landmarkCoordinates[4], landmarkCoordinates[5]),

                    (landmarkCoordinates[6], landmarkCoordinates[7]),
                    (landmarkCoordinates[8], landmarkCoordinates[9]),
                    (landmarkCoordinates[10], landmarkCoordinates[11]),
                    
                    (landmarkCoordinates[12], landmarkCoordinates[13]),
                    (landmarkCoordinates[14], landmarkCoordinates[15]),
                    (landmarkCoordinates[16], landmarkCoordinates[17]),
                    
                    (landmarkCoordinates[18], landmarkCoordinates[19]),
                    (landmarkCoordinates[20], landmarkCoordinates[21]),
                    (landmarkCoordinates[22], landmarkCoordinates[23]),
                    (landmarkCoordinates[24], landmarkCoordinates[25]),

                    (landmarkCoordinates[26], landmarkCoordinates[27]),
                    (landmarkCoordinates[28], landmarkCoordinates[29]),
                    (landmarkCoordinates[30], landmarkCoordinates[31]),
                    (landmarkCoordinates[32], landmarkCoordinates[33])
                )
                ))

    def __len__(self):
        return len(self.sample)
    
    def __getitem__(self, index):
        imgPath, landmarks = self.sample[index]
        landmarks = torch.tensor(landmarks)
        image = Image.open(imgPath).convert("RGB")
        if self.transformer:
            image = self.transformer(image) / 255
        return image, landmarks

In [8]:
def loadDataset(csvFile, imageDir, transformer=None, batchSize=16, splitRatio=0.2, worker=16):
    dataset = landmarksDataset(csvFile, imageDir, transformer)
    trainDataset, testDataset = train_test_split(dataset, test_size=splitRatio)
    print(f"Train dataset size: {len(trainDataset)}, Test dataset size: {len(testDataset)}")
    
    trainLoader = DataLoader(trainDataset, batch_size=batchSize, num_workers=worker)
    testLoader = DataLoader(testDataset, batch_size=batchSize, num_workers=worker)
    return trainLoader, testLoader

In [9]:
transformer = transforms.Compose([
    v2.PILToTensor(),
    v2.ToDtype(torch.float32),
    v2.Resize(256)
])

# Model

In [10]:
from poseLandmark import poseLandmark

## Model debugging

In [None]:
debugModel = poseLandmark()
summary(debugModel, torch.zeros(16, 3, 256, 256))
torch.cuda.empty_cache()

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 65536]               --
|    └─Conv2d: 2-1                       [-1, 16, 256, 256]        448
|    └─ReLU: 2-2                         [-1, 16, 256, 256]        --
|    └─Conv2d: 2-3                       [-1, 64, 256, 256]        9,280
|    └─ReLU: 2-4                         [-1, 64, 256, 256]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 128, 128]        --
|    └─BatchNorm2d: 2-6                  [-1, 64, 128, 128]        128
|    └─Conv2d: 2-7                       [-1, 128, 128, 128]       73,856
|    └─ReLU: 2-8                         [-1, 128, 128, 128]       --
|    └─Conv2d: 2-9                       [-1, 256, 128, 128]       295,168
|    └─ReLU: 2-10                        [-1, 256, 128, 128]       --
|    └─MaxPool2d: 2-11                   [-1, 256, 64, 64]         --
|    └─BatchNorm2d: 2-12                 [-1, 256, 64, 64]         512


In [7]:
debugModel = poseLandmark()
debugOutput = debugModel(torch.zeros(16, 3, 360, 360))
torch.cuda.empty_cache()

In [8]:
print(len(debugOutput))
print(debugOutput[0].shape)

17
torch.Size([16, 2])


## Model training

In [12]:
def train(model,
          epochs,
          scaler,
          lossFn,
          optimizer,
          trainLoader,
          validationLoader,
          checkpointPath,
          overfitDelta=0.005,
          patient=10,
          device="cuda"):
    history = {"trainAvgLoss" : [], "testAvgLoss" : []}
    model.to(device)
    bestLoss = 99999999999
    overfitStreak = 0
    try:
        for epoch in range(epochs + 1):
            # Training
            model.train()
            totalTrainLoss = 0
            totalTrainBatchCount = 0
            trainBar = tqdm(trainLoader, desc=f"Training epoch: {epoch} / {epochs}", unit="batch", leave=True)
            for batch in trainBar:
                loss = 0
                for index, landmark in enumerate(batch):
                    batch[index] = batch[index].to(device)
                images = batch[0]
                landmarks = batch[1:][0].permute(1, 0, 2)

                with torch.autocast(device_type=device, dtype=torch.bfloat16):
                    optimizer.zero_grad()
                    predictions = model(images)
                    for index, landmarkPrediction in enumerate(predictions):
                        loss += lossFn(landmarkPrediction, landmarks[index])

                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
                totalTrainLoss += loss.item()
                totalTrainBatchCount += 1
            history["trainAvgLoss"].append(totalTrainLoss / totalTrainBatchCount)

            # Validation
            model.eval()
            totalValLoss = 0
            totalValBatchCount = 0
            validationBar = tqdm(validationLoader, desc=f"Validating epoch: {epoch} / {epochs}", unit="batch", leave=True)
            for batch in validationBar:
                loss = 0
                for index, landmark in enumerate(batch):
                    batch[index] = batch[index].to(device)
                images = batch[0]
                landmarks = batch[1:][0].permute(1, 0, 2)

                with torch.autocast(device_type=device, dtype=torch.bfloat16):
                    predictions = model(images)
                    for index, landmarkPrediction in enumerate(predictions):
                        loss += lossFn(landmarkPrediction, landmarks[index])

                totalValLoss += loss.item()
                totalValBatchCount += 1
            history["testAvgLoss"].append(totalValLoss / totalValBatchCount)

            # Early stopping
            if bestLoss > history["testAvgLoss"][-1]:
                bestLoss = history["testAvgLoss"][-1]
                overfitStreak = 0
                torch.save(model.state_dict(), checkpointPath)
            elif bestLoss > history["testAvgLoss"][-1] + overfitDelta:
                overfitStreak = 0
            else:
                overfitStreak += 1

            # Logging
            print(f"{'Average train loss: ':>30}{history['trainAvgLoss'][-1]:.5f}")
            print(f"{'Average test loss: ':>29}{history['testAvgLoss'][-1]:.5f}",end="\n\n")

            if overfitStreak >= patient:
                model.load_state_dict(torch.load(checkpointPath, weights_only=True))
                print("Model is going overfit")
                break
    except KeyboardInterrupt:
        model.load_state_dict(torch.load(checkpointPath, weights_only=True))
        print("Stop training due to keyboard interrupt")
    return history

In [13]:
torch.manual_seed(7800)
model = poseLandmark()

In [14]:
trainLoader, testLoader = loadDataset(csvFile, datasetDir, model.transformer)

Train dataset size: 360, Test dataset size: 91


In [15]:
lossFn = nn.L1Loss()
optimizer = optim.AdamW(model.parameters(), lr=0.00001)
scaler = amp.GradScaler()
history = train(model=model,
                epochs=10000,
                scaler=scaler,
                lossFn=lossFn,
                optimizer=optimizer,
                trainLoader=trainLoader,
                validationLoader=testLoader,
                checkpointPath=parentDir.joinpath("model/poseLandmark.pth"),
                overfitDelta=0,
                patient=5)

Training epoch: 0 / 10000: 100%|██████████| 23/23 [00:04<00:00,  5.67batch/s]
Validating epoch: 0 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.23batch/s]


          Average train loss: 2.98813
          Average test loss: 3.13200



Training epoch: 1 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.82batch/s]
Validating epoch: 1 / 10000: 100%|██████████| 6/6 [00:00<00:00,  9.87batch/s]


          Average train loss: 2.66844
          Average test loss: 2.61797



Training epoch: 2 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.65batch/s]
Validating epoch: 2 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.99batch/s]


          Average train loss: 2.56537
          Average test loss: 2.47216



Training epoch: 3 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.50batch/s]
Validating epoch: 3 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.44batch/s]


          Average train loss: 2.47419
          Average test loss: 2.23310



Training epoch: 4 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.77batch/s]
Validating epoch: 4 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.62batch/s]


          Average train loss: 2.40339
          Average test loss: 2.10078



Training epoch: 5 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.79batch/s]
Validating epoch: 5 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.26batch/s]


          Average train loss: 2.32666
          Average test loss: 2.06181



Training epoch: 6 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.85batch/s]
Validating epoch: 6 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.04batch/s]


          Average train loss: 2.27621
          Average test loss: 1.98541



Training epoch: 7 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.94batch/s]
Validating epoch: 7 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.89batch/s]


          Average train loss: 2.22887
          Average test loss: 1.92919



Training epoch: 8 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.88batch/s]
Validating epoch: 8 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.38batch/s]


          Average train loss: 2.17701
          Average test loss: 1.88830



Training epoch: 9 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.77batch/s]
Validating epoch: 9 / 10000: 100%|██████████| 6/6 [00:00<00:00,  9.86batch/s]


          Average train loss: 2.12264
          Average test loss: 1.81662



Training epoch: 10 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.70batch/s]
Validating epoch: 10 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.04batch/s]


          Average train loss: 2.07918
          Average test loss: 1.78121



Training epoch: 11 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.72batch/s]
Validating epoch: 11 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.25batch/s]


          Average train loss: 2.04700
          Average test loss: 1.78873



Training epoch: 12 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.78batch/s]
Validating epoch: 12 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.17batch/s]


          Average train loss: 2.00531
          Average test loss: 1.74066



Training epoch: 13 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.65batch/s]
Validating epoch: 13 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.71batch/s]


          Average train loss: 1.97040
          Average test loss: 1.68034



Training epoch: 14 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.55batch/s]
Validating epoch: 14 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.21batch/s]


          Average train loss: 1.95665
          Average test loss: 1.65779



Training epoch: 15 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.89batch/s]
Validating epoch: 15 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.87batch/s]


          Average train loss: 1.90876
          Average test loss: 1.64545



Training epoch: 16 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.04batch/s]
Validating epoch: 16 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.07batch/s]


          Average train loss: 1.89808
          Average test loss: 1.65855



Training epoch: 17 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.96batch/s]
Validating epoch: 17 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.67batch/s]


          Average train loss: 1.87152
          Average test loss: 1.58846



Training epoch: 18 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.03batch/s]
Validating epoch: 18 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.12batch/s]


          Average train loss: 1.87009
          Average test loss: 1.57197



Training epoch: 19 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.81batch/s]
Validating epoch: 19 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.49batch/s]


          Average train loss: 1.83978
          Average test loss: 1.53836



Training epoch: 20 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.89batch/s]
Validating epoch: 20 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.71batch/s]


          Average train loss: 1.83181
          Average test loss: 1.54882



Training epoch: 21 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.05batch/s]
Validating epoch: 21 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.02batch/s]


          Average train loss: 1.81167
          Average test loss: 1.52245



Training epoch: 22 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.97batch/s]
Validating epoch: 22 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.71batch/s]


          Average train loss: 1.80015
          Average test loss: 1.50753



Training epoch: 23 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.79batch/s]
Validating epoch: 23 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.59batch/s]


          Average train loss: 1.76927
          Average test loss: 1.47962



Training epoch: 24 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.60batch/s]
Validating epoch: 24 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.83batch/s]


          Average train loss: 1.77761
          Average test loss: 1.43362



Training epoch: 25 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.98batch/s]
Validating epoch: 25 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.32batch/s]


          Average train loss: 1.74647
          Average test loss: 1.45876



Training epoch: 26 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.03batch/s]
Validating epoch: 26 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.05batch/s]


          Average train loss: 1.73597
          Average test loss: 1.45685



Training epoch: 27 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.94batch/s]
Validating epoch: 27 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.30batch/s]


          Average train loss: 1.72948
          Average test loss: 1.40562



Training epoch: 28 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.11batch/s]
Validating epoch: 28 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.38batch/s]


          Average train loss: 1.71259
          Average test loss: 1.41490



Training epoch: 29 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.06batch/s]
Validating epoch: 29 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.97batch/s]


          Average train loss: 1.70924
          Average test loss: 1.41346



Training epoch: 30 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.81batch/s]
Validating epoch: 30 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.17batch/s]


          Average train loss: 1.67900
          Average test loss: 1.39750



Training epoch: 31 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.03batch/s]
Validating epoch: 31 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.44batch/s]


          Average train loss: 1.67231
          Average test loss: 1.42388



Training epoch: 32 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.83batch/s]
Validating epoch: 32 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.04batch/s]


          Average train loss: 1.67312
          Average test loss: 1.39818



Training epoch: 33 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.10batch/s]
Validating epoch: 33 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.46batch/s]


          Average train loss: 1.66523
          Average test loss: 1.34832



Training epoch: 34 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.03batch/s]
Validating epoch: 34 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.30batch/s]


          Average train loss: 1.65435
          Average test loss: 1.38291



Training epoch: 35 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.91batch/s]
Validating epoch: 35 / 10000: 100%|██████████| 6/6 [00:00<00:00,  9.95batch/s]


          Average train loss: 1.64459
          Average test loss: 1.33337



Training epoch: 36 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.60batch/s]
Validating epoch: 36 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.01batch/s]


          Average train loss: 1.62349
          Average test loss: 1.32626



Training epoch: 37 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.61batch/s]
Validating epoch: 37 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.77batch/s]


          Average train loss: 1.62819
          Average test loss: 1.34572



Training epoch: 38 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.98batch/s]
Validating epoch: 38 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.92batch/s]


          Average train loss: 1.61997
          Average test loss: 1.34012



Training epoch: 39 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.61batch/s]
Validating epoch: 39 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.13batch/s]


          Average train loss: 1.60408
          Average test loss: 1.31197



Training epoch: 40 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.98batch/s]
Validating epoch: 40 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.29batch/s]


          Average train loss: 1.59500
          Average test loss: 1.30257



Training epoch: 41 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.95batch/s]
Validating epoch: 41 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.06batch/s]


          Average train loss: 1.57959
          Average test loss: 1.31472



Training epoch: 42 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.06batch/s]
Validating epoch: 42 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.07batch/s]


          Average train loss: 1.58327
          Average test loss: 1.28126



Training epoch: 43 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.02batch/s]
Validating epoch: 43 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.42batch/s]


          Average train loss: 1.57336
          Average test loss: 1.29114



Training epoch: 44 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.13batch/s]
Validating epoch: 44 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.17batch/s]


          Average train loss: 1.56933
          Average test loss: 1.27240



Training epoch: 45 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.01batch/s]
Validating epoch: 45 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.28batch/s]


          Average train loss: 1.54755
          Average test loss: 1.26460



Training epoch: 46 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.04batch/s]
Validating epoch: 46 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.13batch/s]


          Average train loss: 1.54528
          Average test loss: 1.26820



Training epoch: 47 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.07batch/s]
Validating epoch: 47 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.59batch/s]


          Average train loss: 1.53695
          Average test loss: 1.28724



Training epoch: 48 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.00batch/s]
Validating epoch: 48 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.98batch/s]


          Average train loss: 1.53022
          Average test loss: 1.28617



Training epoch: 49 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.09batch/s]
Validating epoch: 49 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.76batch/s]


          Average train loss: 1.53492
          Average test loss: 1.25573



Training epoch: 50 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.12batch/s]
Validating epoch: 50 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.11batch/s]


          Average train loss: 1.51626
          Average test loss: 1.25528



Training epoch: 51 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.11batch/s]
Validating epoch: 51 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.83batch/s]


          Average train loss: 1.51699
          Average test loss: 1.28748



Training epoch: 52 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.91batch/s]
Validating epoch: 52 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.63batch/s]


          Average train loss: 1.50628
          Average test loss: 1.24391



Training epoch: 53 / 10000: 100%|██████████| 23/23 [00:03<00:00,  6.71batch/s]
Validating epoch: 53 / 10000: 100%|██████████| 6/6 [00:00<00:00, 10.23batch/s]


          Average train loss: 1.50198
          Average test loss: 1.34200



Training epoch: 54 / 10000: 100%|██████████| 23/23 [00:03<00:00,  7.21batch/s]
Validating epoch: 54 / 10000: 100%|██████████| 6/6 [00:00<00:00, 13.06batch/s]


          Average train loss: 1.48683
          Average test loss: 1.28383



Training epoch: 55 / 10000: 100%|██████████| 23/23 [00:02<00:00,  8.91batch/s]
Validating epoch: 55 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.96batch/s]


          Average train loss: 1.50099
          Average test loss: 1.24044



Training epoch: 56 / 10000: 100%|██████████| 23/23 [00:02<00:00,  8.98batch/s]
Validating epoch: 56 / 10000: 100%|██████████| 6/6 [00:00<00:00, 12.54batch/s]


          Average train loss: 1.49507
          Average test loss: 1.22072



Training epoch: 57 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.23batch/s]
Validating epoch: 57 / 10000: 100%|██████████| 6/6 [00:00<00:00, 13.04batch/s]


          Average train loss: 1.49035
          Average test loss: 1.19485



Training epoch: 58 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.28batch/s]
Validating epoch: 58 / 10000: 100%|██████████| 6/6 [00:00<00:00, 12.54batch/s]


          Average train loss: 1.46829
          Average test loss: 1.19353



Training epoch: 59 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.15batch/s]
Validating epoch: 59 / 10000: 100%|██████████| 6/6 [00:00<00:00, 12.17batch/s]


          Average train loss: 1.46429
          Average test loss: 1.19338



Training epoch: 60 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.21batch/s]
Validating epoch: 60 / 10000: 100%|██████████| 6/6 [00:00<00:00, 12.13batch/s]


          Average train loss: 1.46318
          Average test loss: 1.15394



Training epoch: 61 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.11batch/s]
Validating epoch: 61 / 10000: 100%|██████████| 6/6 [00:00<00:00, 12.38batch/s]


          Average train loss: 1.45244
          Average test loss: 1.19543



Training epoch: 62 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.19batch/s]
Validating epoch: 62 / 10000: 100%|██████████| 6/6 [00:00<00:00, 13.14batch/s]


          Average train loss: 1.46369
          Average test loss: 1.18393



Training epoch: 63 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.23batch/s]
Validating epoch: 63 / 10000: 100%|██████████| 6/6 [00:00<00:00, 12.96batch/s]


          Average train loss: 1.44347
          Average test loss: 1.17417



Training epoch: 64 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.24batch/s]
Validating epoch: 64 / 10000: 100%|██████████| 6/6 [00:00<00:00, 13.39batch/s]


          Average train loss: 1.44785
          Average test loss: 1.11461



Training epoch: 65 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.14batch/s]
Validating epoch: 65 / 10000: 100%|██████████| 6/6 [00:00<00:00, 12.64batch/s]


          Average train loss: 1.43609
          Average test loss: 1.16441



Training epoch: 66 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.29batch/s]
Validating epoch: 66 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.93batch/s]


          Average train loss: 1.43469
          Average test loss: 1.14946



Training epoch: 67 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.14batch/s]
Validating epoch: 67 / 10000: 100%|██████████| 6/6 [00:00<00:00, 12.40batch/s]


          Average train loss: 1.42142
          Average test loss: 1.12760



Training epoch: 68 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.24batch/s]
Validating epoch: 68 / 10000: 100%|██████████| 6/6 [00:00<00:00, 12.59batch/s]


          Average train loss: 1.42117
          Average test loss: 1.18195



Training epoch: 69 / 10000: 100%|██████████| 23/23 [00:02<00:00,  9.23batch/s]
Validating epoch: 69 / 10000: 100%|██████████| 6/6 [00:00<00:00, 11.85batch/s]


          Average train loss: 1.43097
          Average test loss: 1.13870

Model is going overfit


In [15]:
torch.cuda.empty_cache()