In [1]:
import torch.utils.data as data
import scipy.io as sio
from PIL import Image
import os
import os.path
import torchvision.transforms as transforms
import torch
import numpy as np
import re
import pandas as pd
import torch.nn.functional as F
import torch.nn as nn
import torch.backends.cudnn as cudnn
import time
import datetime
import shutil

In [2]:
class gazeData(data.Dataset):
    def __init__(self, dataset, imSize=(224,224)):

        self.dataset = dataset
        self.imSize = imSize
        
        self.transformImg = transforms.Compose([transforms.Resize(self.imSize)
                                                ,transforms.ToTensor(),
                                                 transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])])
        self.classLabel = [0]*16
#         self.transformPoint = transforms.Compose([
#             transforms.ToTensor()
#         ])
    

    def __getitem__(self, index):
#         index = self.indices[index]

        filePath = self.dataset['file'][index]
        image = Image.open(filePath).convert('RGB')
        image = self.transformImg(image)   

        labels = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        labels[self.dataset['location'][index]] = 1
        labels = torch.FloatTensor(labels)
        sample = {'image': image, 'labels': labels}
        
        
        return sample
    
        
    def __len__(self):
        return len(self.dataset)


In [3]:
import torch.nn as nn
import math

__all__ = ['ResNet', 'resnet50']

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=16, include_top=True):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.include_top = include_top
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        
        if not self.include_top:
            return x
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

def resnet50(**kwargs):
    """Constructs a ResNet-50 model.
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    return model



In [4]:

def vggface(pretrained=False, **kwargs):
    """VGGFace model.
    Args:
        pretrained (bool): If True, returns pre-trained model 
    """
    model = VggFace(**kwargs)
    if pretrained:
        state = torch.utils.model_zoo.load_url(MODEL_URL)
        model.load_state_dict(state)
    return model


class VggFace(torch.nn.Module):
    def __init__(self, classes=16):
        """VGGFace model.
        Face recognition network.  It takes as input a Bx3x224x224
        batch of face images and gives as output a BxC score vector
        (C is the number of identities).
        Input images need to be scaled in the 0-1 range and then 
        normalized with respect to the mean RGB used during training.
        Args:
            classes (int): number of identities recognized by the
            network
        """
        super().__init__()
        self.conv1 = _ConvBlock(3, 64, 64)
        self.conv2 = _ConvBlock(64, 128, 128)
        self.conv3 = _ConvBlock(128, 256, 256, 256)
        self.conv4 = _ConvBlock(256, 512, 512, 512)
        self.conv5 = _ConvBlock(512, 512, 512, 512)
        self.dropout = torch.nn.Dropout(0.5)
        self.fc1 = torch.nn.Linear(7 * 7 * 512, 4096)
        self.fc2 = torch.nn.Linear(4096, 4096)
        self.fc3 = torch.nn.Linear(4096, classes)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

class _ConvBlock(torch.nn.Module):
    """A Convolutional block."""

    def __init__(self, *units):
        """Create a block with len(units) - 1 convolutions.
        convolution number i transforms the number of channels from 
        units[i - 1] to units[i] channels.
        """
        super().__init__()
        self.convs = torch.nn.ModuleList([
            torch.nn.Conv2d(in_, out, 3, 1, 1)
            for in_, out in zip(units[:-1], units[1:])
        ])
        
    def forward(self, x):
        # Each convolution is followed by a ReLU, then the block is
        # concluded by a max pooling.
        for c in self.convs:
            x = F.relu(c(x))
        return F.max_pool2d(x, 2, 2, 0, ceil_mode=True)
    


In [5]:
class SEModule(nn.Module):

    def __init__(self, planes, compress_rate):
        super(SEModule, self).__init__()
        self.conv1 = nn.Conv2d(planes, planes // compress_rate, kernel_size=1, stride=1, bias=True)
        self.conv2 = nn.Conv2d(planes // compress_rate, planes, kernel_size=1, stride=1, bias=True)
        self.relu = nn.ReLU(inplace=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        module_input = x
        x = F.avg_pool2d(module_input, kernel_size=module_input.size(2))
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.sigmoid(x)
        return module_input * x


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

        # SENet
        compress_rate = 16
        # self.se_block = SEModule(planes * 4, compress_rate)  # this is not used.
        self.conv4 = nn.Conv2d(planes * 4, planes * 4 // compress_rate, kernel_size=1, stride=1, bias=True)
        self.conv5 = nn.Conv2d(planes * 4 // compress_rate, planes * 4, kernel_size=1, stride=1, bias=True)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)


        ## senet
        out2 = F.avg_pool2d(out, kernel_size=out.size(2))
        out2 = self.conv4(out2)
        out2 = self.relu(out2)
        out2 = self.conv5(out2)
        out2 = self.sigmoid(out2)
        # out2 = self.se_block.forward(out)  # not used

        if self.downsample is not None:
            residual = self.downsample(x)

        out = out2 * out + residual
        # out = out2 + residual  # not used
        out = self.relu(out)
        return out


class SENet(nn.Module):

    def __init__(self, block, layers, num_classes=16, include_top=True):
        self.inplanes = 64
        super(SENet, self).__init__()
        self.include_top = include_top
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        
        if not self.include_top:
            return x
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


def senet50(**kwargs):
    """Constructs a SENet-50 model.
    """
    model = SENet(Bottleneck, [3, 4, 6, 3], **kwargs)
    return model

In [6]:
def save_checkpoint(state, is_best, filename='checkpointClass.pth.tar'):
    CHECKPOINTS_PATH = './gazeClassCheckpoint'
    if not os.path.isdir(CHECKPOINTS_PATH):
        os.makedirs(CHECKPOINTS_PATH, 0o777)
    bestFilename = os.path.join(CHECKPOINTS_PATH, 'best_' + filename)
    filename = os.path.join(CHECKPOINTS_PATH, filename)
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, bestFilename)
        
def load_checkpoint(filename='./gazeClassCheckpoint/checkpointClass.pth.tar'):
    print(filename)
    if not os.path.isfile(filename):
        return None
    state = torch.load(filename)
    return state

In [7]:
def adjust_learning_rate(optimizer, epoch):
    lr = 0.0001 * (0.1 ** (epoch // 30))
    for param_group in optimizer.state_dict()['param_groups']:
        param_group['lr'] = lr

In [8]:
workers = 16
epochs = 25
batch_size = 64
weight_decay = 1e-4
best_loss = 1000
lr = 0.0001

In [9]:
df_gaze = pd.read_csv('gazeClassData.csv')
df_gaze =df_gaze.drop(['index'],axis=1)

df_train=df_gaze.sample(frac=0.9,random_state=100) 
df_tmp= df_gaze.drop(df_train.index)

df_train.reset_index(inplace=True)
df_tmp.reset_index(inplace=True)

df_val = df_tmp.sample(frac=0.5, random_state = 100)
df_test = df_tmp.drop(df_val.index)

df_val.reset_index(inplace=True)
df_test.reset_index(inplace=True)

In [10]:
df_train.head(5)

Unnamed: 0,index,location,file
0,438573,2,./data/01849/frames/00743.jpg
1,703392,12,./data/02945/frames/00833.jpg
2,158947,11,./data/00831/frames/00307.jpg
3,779797,15,./data/03312/frames/00838.jpg
4,587944,15,./data/02416/frames/00086.jpg


In [11]:
print('train',len(df_train),'val',len(df_val),'test',len(df_test))

train 737485 val 40972 test 40971


In [12]:
dataTrain = gazeData(dataset=df_train)
dataVal = gazeData(dataset=df_val)
dataTest = gazeData(dataset=df_test)

In [14]:
train_loader = torch.utils.data.DataLoader(
        dataTrain,
        batch_size=batch_size, shuffle=True,
        num_workers=workers, pin_memory=True)
val_loader = torch.utils.data.DataLoader(
        dataVal,
        batch_size=batch_size, shuffle=True,
        num_workers=workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(
        dataTest,
        batch_size=1, shuffle=True,
        num_workers=workers, pin_memory=True)

In [15]:
# model = resnet50()
model = senet50()
# model =vggface()
model = torch.nn.DataParallel(model)
model.cuda()
cudnn.benchmark = True   
criterion = nn.MSELoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr)

In [16]:
epoch =0
saved = load_checkpoint()
if saved:
    print('Loading checkpoint for epoch %05d with loss %.5f (which is the mean squared error not the actual linear error)...' % (saved['epoch'], saved['best_prec1']))
    state = saved['state_dict']
    try:
        model.module.load_state_dict(state)
    except:
        model.load_state_dict(state)
    epoch = saved['epoch']
    best_loss = saved['best_prec1']
else:
    print('Warning: Could not read checkpoint!')

./gazeClassCheckpoint/checkpointClass.pth.tar
Loading checkpoint for epoch 00002 with loss 0.00010 (which is the mean squared error not the actual linear error)...


In [17]:
def train(train_loader, model, criterion,optimizer, epoch):
    model.train()
    end = time.time()
    running_loss = 0
    for i,sample in enumerate(train_loader):
        frame, locationClass= sample['image'],sample['labels']

        locationClass = locationClass.cuda()
        frame = frame.cuda()
        locationClass = torch.autograd.Variable(locationClass, requires_grad = True)
        frame = torch.autograd.Variable(frame, requires_grad = True)
        
        optimizer.zero_grad()

        output = model(frame)
        
        loss = criterion(output, locationClass)
        
        loss.backward()
        optimizer.step()

        # 통계를 출력합니다.
        running_loss += loss.item()
        if i % 200 == 0:    # print every 2000 mini-batches
            print('Train [%d, %d / %d] loss: %.3f' %
                  (epoch + 1, i + 1,len(train_loader), running_loss / 200))
            running_loss = 0.0
            print(str(datetime.datetime.now().time()))

def validate(val_loader, model, criterion,optimizer, epoch) :

    model.eval()
    end = time.time()
    val_loss = 0
    for i,sample in enumerate(val_loader):
        frame, locationClass= sample['image'],sample['labels']

        locationClass = locationClass.cuda()
        frame = frame.cuda()
        locationClass = torch.autograd.Variable(locationClass, requires_grad = True)
        frame = torch.autograd.Variable(frame, requires_grad = True)
        
        optimizer.zero_grad()

        with torch.no_grad():
            output = model(frame)
        
        loss = criterion(output, locationClass)
        
        loss.backward()
        optimizer.step()

        val_loss += loss.item()
        if i % 200 == 0:   
            print('Validate [%d, %5d / %5d ] loss: %.3f' %
                  (epoch + 1, i + 1,len(val_loader) , val_loss / (i+1)))
            print(str(datetime.datetime.now().time()))
        return val_loss/len(val_loader)
    
def TestData(test_loader, model) :

    model.eval()
    correct = 0
    total = 0
    for i,sample in enumerate(test_loader):
        frame, locationClass= sample['image'],sample['labels']
        locationClass = locationClass.cuda()
        frame = frame.cuda()
        locationClass = torch.autograd.Variable(locationClass, requires_grad = True)
        frame = torch.autograd.Variable(frame, requires_grad = True)
        
        output = model(frame)
        _, predicted = torch.max(output.data, 1)
        total += locationClass.size(0)
        correct += (predicted == locationClass).sum().item()
        
    print('Test set: Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)) )

In [18]:
print('EPOCH >',epoch)
for epoch in range(epoch, epochs):
    adjust_learning_rate(optimizer, epoch)

    train(train_loader, model, criterion, optimizer, epoch)

    val_loss = validate(val_loader, model, criterion,optimizer, epoch)
    
    TestData(test_loader,model)
    # remember best prec@1 and save checkpoint
    is_best = val_loss < best_loss
    best_loss = min(val_loss, best_loss)
    save_checkpoint({
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'best_prec1': best_loss,
    }, is_best)

EPOCH > 2
Train [3, 1 / 11524] loss: 0.000
13:01:06.717224
Train [3, 201 / 11524] loss: 0.062
13:01:54.701258
Train [3, 401 / 11524] loss: 0.062
13:02:43.391670
Train [3, 601 / 11524] loss: 0.062
13:03:32.534243
Train [3, 801 / 11524] loss: 0.062
13:04:21.947116
Train [3, 1001 / 11524] loss: 0.062
13:05:11.159270
Train [3, 1201 / 11524] loss: 0.062
13:06:00.626293
Train [3, 1401 / 11524] loss: 0.062
13:06:50.009443
Train [3, 1601 / 11524] loss: 0.062
13:07:39.480730
Train [3, 1801 / 11524] loss: 0.062
13:08:28.683464
Train [3, 2001 / 11524] loss: 0.062
13:09:18.034103
Train [3, 2201 / 11524] loss: 0.062
13:10:07.441532
Train [3, 2401 / 11524] loss: 0.062
13:10:56.788837
Train [3, 2601 / 11524] loss: 0.062
13:11:45.948661
Train [3, 2801 / 11524] loss: 0.062
13:12:35.222827
Train [3, 3001 / 11524] loss: 0.062
13:13:24.365785
Train [3, 3201 / 11524] loss: 0.062
13:14:13.760509
Train [3, 3401 / 11524] loss: 0.062
13:15:03.053508
Train [3, 3601 / 11524] loss: 0.062
13:15:52.175341
Train [3,

Train [5, 7801 / 11524] loss: 0.062
15:38:19.808043
Train [5, 8001 / 11524] loss: 0.062
15:39:08.875158
Train [5, 8201 / 11524] loss: 0.062
15:39:58.150527
Train [5, 8401 / 11524] loss: 0.062
15:40:47.376791
Train [5, 8601 / 11524] loss: 0.062
15:41:36.542419
Train [5, 8801 / 11524] loss: 0.062
15:42:25.843971
Train [5, 9001 / 11524] loss: 0.062
15:43:15.018541
Train [5, 9201 / 11524] loss: 0.062
15:44:04.305027
Train [5, 9401 / 11524] loss: 0.062
15:44:53.491098
Train [5, 9601 / 11524] loss: 0.062
15:45:42.750031
Train [5, 9801 / 11524] loss: 0.062
15:46:32.003965
Train [5, 10001 / 11524] loss: 0.062
15:47:21.121550
Train [5, 10201 / 11524] loss: 0.062
15:48:10.326088
Train [5, 10401 / 11524] loss: 0.062
15:48:59.559623
Train [5, 10601 / 11524] loss: 0.062
15:49:48.708880
Train [5, 10801 / 11524] loss: 0.062
15:50:37.861171
Train [5, 11001 / 11524] loss: 0.062
15:51:27.094223
Train [5, 11201 / 11524] loss: 0.062
15:52:16.228771
Train [5, 11401 / 11524] loss: 0.062
15:53:05.317622
Vali

Train [8, 3601 / 11524] loss: 0.061
18:28:30.093644
Train [8, 3801 / 11524] loss: 0.061
18:29:19.331115
Train [8, 4001 / 11524] loss: 0.061
18:30:08.543496
Train [8, 4201 / 11524] loss: 0.062
18:30:57.803518
Train [8, 4401 / 11524] loss: 0.061
18:31:47.104003
Train [8, 4601 / 11524] loss: 0.061
18:32:36.352783
Train [8, 4801 / 11524] loss: 0.061
18:33:25.561756
Train [8, 5001 / 11524] loss: 0.061
18:34:14.754497
Train [8, 5201 / 11524] loss: 0.061
18:35:04.025481
Train [8, 5401 / 11524] loss: 0.061
18:35:53.089979
Train [8, 5601 / 11524] loss: 0.061
18:36:42.364914
Train [8, 5801 / 11524] loss: 0.061
18:37:31.528359
Train [8, 6001 / 11524] loss: 0.061
18:38:20.701033
Train [8, 6201 / 11524] loss: 0.061
18:39:09.860998
Train [8, 6401 / 11524] loss: 0.061
18:39:59.066759
Train [8, 6601 / 11524] loss: 0.061
18:40:48.159239
Train [8, 6801 / 11524] loss: 0.061
18:41:37.342810
Train [8, 7001 / 11524] loss: 0.061
18:42:26.724716
Train [8, 7201 / 11524] loss: 0.061
18:43:15.921644
Train [8, 74

Train [10, 11201 / 11524] loss: 0.061
21:05:20.526703
Train [10, 11401 / 11524] loss: 0.061
21:06:10.402746
Validate [10,     1 /   641 ] loss: 0.062
21:06:43.357921
Test set: Accuracy: 41.23%
Train [11, 1 / 11524] loss: 0.000
21:21:46.968078
Train [11, 201 / 11524] loss: 0.061
21:22:36.340353
Train [11, 401 / 11524] loss: 0.061
21:23:25.895086
Train [11, 601 / 11524] loss: 0.061
21:24:15.881822
Train [11, 801 / 11524] loss: 0.061
21:25:05.858864
Train [11, 1001 / 11524] loss: 0.061
21:25:55.886274
Train [11, 1201 / 11524] loss: 0.061
21:26:45.896416
Train [11, 1401 / 11524] loss: 0.061
21:27:35.841390
Train [11, 1601 / 11524] loss: 0.061
21:28:25.756965
Train [11, 1801 / 11524] loss: 0.061
21:29:15.615000
Train [11, 2001 / 11524] loss: 0.061
21:30:05.395578
Train [11, 2201 / 11524] loss: 0.061
21:30:55.245395
Train [11, 2401 / 11524] loss: 0.061
21:31:45.168304
Train [11, 2601 / 11524] loss: 0.061
21:32:35.005969
Train [11, 2801 / 11524] loss: 0.061
21:33:24.771679
Train [11, 3001 / 1

Train [13, 6401 / 11524] loss: 0.061
23:54:35.431593
Train [13, 6601 / 11524] loss: 0.061
23:55:25.567409
Train [13, 6801 / 11524] loss: 0.061
23:56:15.664679
Train [13, 7001 / 11524] loss: 0.061
23:57:05.730158
Train [13, 7201 / 11524] loss: 0.061
23:57:55.904097
Train [13, 7401 / 11524] loss: 0.061
23:58:46.056441
Train [13, 7601 / 11524] loss: 0.061
23:59:36.103200
Train [13, 7801 / 11524] loss: 0.061
00:00:26.174865
Train [13, 8001 / 11524] loss: 0.061
00:01:16.294206
Train [13, 8201 / 11524] loss: 0.061
00:02:06.414021
Train [13, 8401 / 11524] loss: 0.061
00:02:56.360331
Train [13, 8601 / 11524] loss: 0.061
00:03:46.455574
Train [13, 8801 / 11524] loss: 0.061
00:04:36.620655
Train [13, 9001 / 11524] loss: 0.061
00:05:26.737219
Train [13, 9201 / 11524] loss: 0.061
00:06:17.066601
Train [13, 9401 / 11524] loss: 0.061
00:07:07.195494
Train [13, 9601 / 11524] loss: 0.061
00:07:57.425625
Train [13, 9801 / 11524] loss: 0.061
00:08:47.507223
Train [13, 10001 / 11524] loss: 0.061
00:09:37

Train [16, 1601 / 11524] loss: 0.061
02:44:20.983084
Train [16, 1801 / 11524] loss: 0.061
02:45:11.351503
Train [16, 2001 / 11524] loss: 0.061
02:46:01.755185
Train [16, 2201 / 11524] loss: 0.061
02:46:52.177772
Train [16, 2401 / 11524] loss: 0.061
02:47:42.456094
Train [16, 2601 / 11524] loss: 0.061
02:48:32.786641
Train [16, 2801 / 11524] loss: 0.061
02:49:23.089250
Train [16, 3001 / 11524] loss: 0.061
02:50:13.355179
Train [16, 3201 / 11524] loss: 0.061
02:51:03.599033
Train [16, 3401 / 11524] loss: 0.061
02:51:53.865056
Train [16, 3601 / 11524] loss: 0.061
02:52:44.150338
Train [16, 3801 / 11524] loss: 0.061
02:53:34.745940
Train [16, 4001 / 11524] loss: 0.061
02:54:25.059163
Train [16, 4201 / 11524] loss: 0.061
02:55:15.269965
Train [16, 4401 / 11524] loss: 0.061
02:56:05.650346
Train [16, 4601 / 11524] loss: 0.061
02:56:55.948562
Train [16, 4801 / 11524] loss: 0.061
02:57:46.211027
Train [16, 5001 / 11524] loss: 0.061
02:58:36.578187
Train [16, 5201 / 11524] loss: 0.061
02:59:26.

Train [18, 8801 / 11524] loss: 0.061
05:21:41.095630
Train [18, 9001 / 11524] loss: 0.061
05:22:31.404704
Train [18, 9201 / 11524] loss: 0.061
05:23:21.729958
Train [18, 9401 / 11524] loss: 0.061
05:24:12.085691
Train [18, 9601 / 11524] loss: 0.061
05:25:02.384442
Train [18, 9801 / 11524] loss: 0.061
05:25:52.705434
Train [18, 10001 / 11524] loss: 0.061
05:26:43.062100
Train [18, 10201 / 11524] loss: 0.061
05:27:33.392562
Train [18, 10401 / 11524] loss: 0.061
05:28:23.671329
Train [18, 10601 / 11524] loss: 0.061
05:29:14.006896
Train [18, 10801 / 11524] loss: 0.061
05:30:04.256415
Train [18, 11001 / 11524] loss: 0.061
05:30:54.512705
Train [18, 11201 / 11524] loss: 0.061
05:31:44.941563
Train [18, 11401 / 11524] loss: 0.061
05:32:35.480686
Validate [18,     1 /   641 ] loss: 0.062
05:33:09.710300
Test set: Accuracy: 41.40%
Train [19, 1 / 11524] loss: 0.000
05:48:17.115811
Train [19, 201 / 11524] loss: 0.061
05:49:06.572617
Train [19, 401 / 11524] loss: 0.061
05:49:56.554131
Train [19, 

Train [21, 4001 / 11524] loss: 0.061
08:12:25.714452
Train [21, 4201 / 11524] loss: 0.061
08:13:15.906522
Train [21, 4401 / 11524] loss: 0.061
08:14:06.162416
Train [21, 4601 / 11524] loss: 0.061
08:14:56.459866
Train [21, 4801 / 11524] loss: 0.061
08:15:46.861430
Train [21, 5001 / 11524] loss: 0.061
08:16:37.067964
Train [21, 5201 / 11524] loss: 0.061
08:17:27.412776
Train [21, 5401 / 11524] loss: 0.061
08:18:17.693731
Train [21, 5601 / 11524] loss: 0.061
08:19:08.136014
Train [21, 5801 / 11524] loss: 0.061
08:19:58.445559
Train [21, 6001 / 11524] loss: 0.061
08:20:48.715119
Train [21, 6201 / 11524] loss: 0.061
08:21:38.899760
Train [21, 6401 / 11524] loss: 0.061
08:22:29.158585
Train [21, 6601 / 11524] loss: 0.061
08:23:19.415139
Train [21, 6801 / 11524] loss: 0.061
08:24:09.755773
Train [21, 7001 / 11524] loss: 0.061
08:25:00.147058
Train [21, 7201 / 11524] loss: 0.061
08:25:50.439155
Train [21, 7401 / 11524] loss: 0.061
08:26:40.790802
Train [21, 7601 / 11524] loss: 0.061
08:27:31.

Train [23, 11201 / 11524] loss: 0.061
10:47:58.484239
Train [23, 11401 / 11524] loss: 0.061
10:48:48.022342
Validate [23,     1 /   641 ] loss: 0.061
10:49:21.612024
Test set: Accuracy: 47.52%
Train [24, 1 / 11524] loss: 0.000
11:04:23.529839
Train [24, 201 / 11524] loss: 0.061
11:05:12.429221
Train [24, 401 / 11524] loss: 0.061
11:06:01.675718
Train [24, 601 / 11524] loss: 0.061
11:06:51.179672
Train [24, 801 / 11524] loss: 0.061
11:07:40.959614
Train [24, 1001 / 11524] loss: 0.061
11:08:30.710675
Train [24, 1201 / 11524] loss: 0.061
11:09:20.469027
Train [24, 1401 / 11524] loss: 0.061
11:10:10.077679
Train [24, 1601 / 11524] loss: 0.061
11:10:59.641712
Train [24, 1801 / 11524] loss: 0.061
11:11:49.226002
Train [24, 2001 / 11524] loss: 0.061
11:12:38.734367
Train [24, 2201 / 11524] loss: 0.061
11:13:28.245996
Train [24, 2401 / 11524] loss: 0.061
11:14:17.730200
Train [24, 2601 / 11524] loss: 0.061
11:15:07.119188
Train [24, 2801 / 11524] loss: 0.061
11:15:56.503022
Train [24, 3001 / 1

RuntimeError: CUDA out of memory. Tried to allocate 12.00 MiB (GPU 1; 7.79 GiB total capacity; 6.49 GiB already allocated; 21.38 MiB free; 6.60 GiB reserved in total by PyTorch)