In [1]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision.datasets as dset
import torchvision.transforms as transforms
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.models as models
import cv2
import sys
import math
import random
import splitfolders
import torchsummary
from tqdm import tqdm
from ResNet_18 import resnet
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [3]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':50,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':32,
    'SEED':42
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

In [5]:
train_df = pd.read_csv('./train_data.csv', index_col = 0)
test_df = pd.read_csv('./test_data.csv', index_col = 0)

In [6]:
le = LabelEncoder()
le = le.fit(train_df['action'])
train_df['action'] = le.transform(train_df['action'])
test_df['action'] = le.transform(test_df['action'])

In [7]:
img_path_list = []
for i in range(15):
    path_list = list(train_df[train_df['action']==i]['img_path'])
    if len(path_list) >= 5000:
        tmp = random.sample(path_list, 5000)
        for i in tmp:
            img_path_list.append(i)
    else:
        for i in path_list:
            img_path_list.append(i)
df = pd.DataFrame(img_path_list)
df.columns = ['img_path']
df

path_label_df = pd.merge(train_df, df, on='img_path', how='inner')

In [8]:
path_label_df['action'].value_counts()

14    5000
13    5000
8     5000
4     4381
6     3968
12    2534
3     2481
10    2406
7     2291
1     2187
11    2144
9     1995
5     1927
2     1580
0      619
Name: action, dtype: int64

In [9]:
img_path_list = []
for i in range(15):
    path_list = list(test_df[test_df['action']==i]['img_path'])
    if len(path_list) >= 300:
        tmp = random.sample(path_list, 300)
        for i in tmp:
            img_path_list.append(i)
    else:
        for i in path_list:
            img_path_list.append(i)
df2 = pd.DataFrame(img_path_list)
df2.columns = ['img_path']
df2

path_label_df2 = pd.merge(test_df, df2, on='img_path', how='inner')
path_label_df2

Unnamed: 0,action,img_path,user_x,lat,lon
0,13,./ETRI_data_RP_png/user03/1600876800/RP/160090...,user03,37.488120,126.982263
1,10,./ETRI_data_RP_png/user26/1599836400/RP/159985...,user26,37.554346,126.922069
2,6,./ETRI_data_RP_png/user27/1600309320/RP/160031...,user27,37.529723,127.008367
3,5,./ETRI_data_RP_png/user04/1599957060/RP/159997...,user04,37.434939,127.138877
4,11,./ETRI_data_RP_png/user02/1598972400/RP/159903...,user02,37.521560,127.032720
...,...,...,...,...,...
3724,13,./ETRI_data_RP_png/user03/1599928800/RP/159999...,user03,37.283530,126.980865
3725,8,./ETRI_data_RP_png/user03/1601141400/RP/160121...,user03,37.485415,126.977617
3726,13,./ETRI_data_RP_png/user03/1600448400/RP/160048...,user03,37.517630,127.089633
3727,4,./ETRI_data_RP_png/user04/1599690000/RP/159970...,user04,37.464091,127.127168


In [10]:
path_label_df2['action'].value_counts()

13    300
6     300
8     300
14    300
4     300
12    273
10    271
3     271
11    250
7     248
1     247
9     220
5     208
2     158
0      83
Name: action, dtype: int64

In [11]:
train_df = path_label_df

In [12]:
test_df = path_label_df2

In [13]:
train, val, _, _ = train_test_split(train_df, train_df['action'], test_size=0.1, random_state=CFG['SEED'], stratify=train_df['action'])

In [14]:
train['img_path'] = train['img_path'].apply(lambda x : x.replace('./ETRI_data_RP_png', '../ETRIdata'))
val['img_path'] = val['img_path'].apply(lambda x : x.replace('./ETRI_data_RP_png', '../ETRIdata'))
test_df['img_path'] = train_df['img_path'].apply(lambda x : x.replace('./ETRI_data_RP_png', '../ETRIdata'))

In [15]:
train

Unnamed: 0,action,img_path,user_x,lat,lon
41386,3,../ETRIdata/user03/1600876800/RP/1600952820.png,user03,37.485394,126.977561
36791,10,../ETRIdata/user01/1601166300/RP/1601193840.png,user01,37.482449,126.956348
18443,6,../ETRIdata/user26/1600095600/RP/1600155180.png,user26,37.278514,127.163543
27588,12,../ETRIdata/user09/1600403400/RP/1600414800.png,user09,37.381507,127.230470
7780,14,../ETRIdata/user04/1600207800/RP/1600247820.png,user04,37.477683,127.122614
...,...,...,...,...,...
10322,6,../ETRIdata/user06/1601912160/RP/1601986620.png,user06,37.513558,127.045732
10575,1,../ETRIdata/user26/1600441200/RP/1600497840.png,user26,37.242523,127.390785
21870,7,../ETRIdata/user04/1600558200/RP/1600583160.png,user04,37.434926,127.138867
23790,8,../ETRIdata/user03/1599928800/RP/1600005840.png,user03,37.485407,126.977599


In [16]:
RP_tfms = A.Compose([
    A.Resize(width=CFG['IMG_SIZE'], height=CFG['IMG_SIZE']),
    A.Normalize()
], p=1)

In [17]:
Gps_tfms = A.Compose([
    A.Resize(width=112, height=112),
    A.Normalize()
], p=1)

In [18]:
class RPDataset(Dataset):
    def __init__(self, df, rp_path_list, label_list, tfms=None):
        super().__init__()
        self.df = df
        self.rp_path_list = rp_path_list
        self.label_list = label_list
        self.tfms = tfms
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img = cv2.imread(self.rp_path_list[idx])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        image = self.tfms(image=img)['image']
        image = torch.tensor(np.array(image)).permute(2, 0, 1)
        
        if self.label_list is not None:
            label = self.label_list[idx]
            return image, label
        else:
            return image

In [19]:
class GpsDataset(Dataset):
    def __init__(self, df, lat_path_list, lon_path_list, label_list, tfms=None):
        super(GpsDataset, self).__init__()
        self.df = df
        self.lat_path_list = lat_path_list
        self.lon_path_list = lon_path_list
        self.label_list = label_list
        self.tfms = tfms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        lat = self.lat_path_list[idx]
        lon = self.lon_path_list[idx]
        feature_map = torch.tensor(np.array([lat, lon]))
        
        if self.label_list is not None:
            label = self.label_list[idx]
            return feature_map, label
        else:
            return feature_map

In [20]:
train

Unnamed: 0,action,img_path,user_x,lat,lon
41386,3,../ETRIdata/user03/1600876800/RP/1600952820.png,user03,37.485394,126.977561
36791,10,../ETRIdata/user01/1601166300/RP/1601193840.png,user01,37.482449,126.956348
18443,6,../ETRIdata/user26/1600095600/RP/1600155180.png,user26,37.278514,127.163543
27588,12,../ETRIdata/user09/1600403400/RP/1600414800.png,user09,37.381507,127.230470
7780,14,../ETRIdata/user04/1600207800/RP/1600247820.png,user04,37.477683,127.122614
...,...,...,...,...,...
10322,6,../ETRIdata/user06/1601912160/RP/1601986620.png,user06,37.513558,127.045732
10575,1,../ETRIdata/user26/1600441200/RP/1600497840.png,user26,37.242523,127.390785
21870,7,../ETRIdata/user04/1600558200/RP/1600583160.png,user04,37.434926,127.138867
23790,8,../ETRIdata/user03/1599928800/RP/1600005840.png,user03,37.485407,126.977599


In [21]:
RP_train_dataset = RPDataset(df=train, rp_path_list=train['img_path'].values, label_list=train['action'].values, tfms=RP_tfms)
RP_train_loader = DataLoader(RP_train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

RP_val_dataset = RPDataset(df=val,rp_path_list=val['img_path'].values, label_list=val['action'].values, tfms=RP_tfms)
RP_val_loader = DataLoader(RP_val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [22]:
Gps_train_dataset = GpsDataset(df=train, lat_path_list=train['lat'].values, lon_path_list=train['lon'].values, label_list=train['action'].values, tfms=Gps_tfms)
Gps_train_loader = DataLoader(Gps_train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

Gps_val_dataset = GpsDataset(df=val, lat_path_list=train['lat'].values, lon_path_list=train['lon'].values, label_list=val['action'].values, tfms=Gps_tfms)
Gps_val_loader = DataLoader(Gps_val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [23]:
class FocalLoss(nn.Module):
    def __init__(self, weight=None, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.weight = weight
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, weight=self.weight, reduction=self.reduction)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1-pt)**self.gamma * ce_loss).mean()
        return focal_loss

In [24]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        bias=False,
        padding = 1,
        padding_mode='zeros'
    )


def conv1x1(in_planes, out_planes, stride=1):
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=1,
        stride=stride,
        bias=False,
        padding = 1,
        padding_mode='zeros'
    )

class ChannelAttention(nn.Module):
    def __init__(self, channel):
        super(ChannelAttention, self).__init__()
        self.attention = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(channel, channel, 1, padding=0, bias=False),
            nn.ReLU(inplace=True),
            nn.Conv2d(channel, channel, 1, padding=0, bias=False),
            nn.Sigmoid())
    def forward(self, x):
        y = self.attention(x)
        return x * y
    
class CAB(nn.Module):
    def __init__(self, channel):
        super(CAB, self).__init__()
        self.cab = nn.Sequential(
            nn.Conv2d(channel, channel, kernel_size=3, padding=1, stride=1),
            nn.GELU(),
            nn.Conv2d(channel, channel, kernel_size=3, padding=1, stride=1),
            ChannelAttention(channel)
            )
    def forward(self, x):
        return self.cab(x)


class IdentityBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride = 1):
        super(IdentityBlock, self).__init__()

        self.conv1 = conv3x3(in_planes, out_planes, stride)
#         self.cab = CAB(out_planes)
        self.conv2 = conv3x3(out_planes, out_planes, 1)

        self.bn1   = nn.BatchNorm2d(out_planes)
        self.bn2   = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride != 1:
            self.shortcut = nn.Sequential(
                conv1x1(in_planes, out_planes, stride),
                nn.BatchNorm2d(out_planes)
            )

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out  = self.conv2(out)
#         out = self.cab(out)
        out  = self.bn2(out)
        out += identity
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, in_planes, num_blocks, num_classes):
        super(ResNet, self).__init__()

        self.in_planes = in_planes

#         self.conv = nn.Conv2d(3, in_planes, kernel_size = 3, stride = 1, padding = 1, padding_mode='zeros', bias=False)
        self.conv = nn.Conv2d(3, 32, kernel_size = 3, stride = 1, padding = 3, padding_mode='zeros', bias=False)
        self.cab = CAB(in_planes)
#         self.bn = nn.BatchNorm2d(self.in_planes)
        self.bn = nn.BatchNorm2d(32)
#         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.layer1 = self.make_layer(block, in_planes, num_blocks[0], stride=1)
        self.layer2 = self.make_layer(block, in_planes, num_blocks[1], stride=1)
        self.layer3 = self.make_layer(block, in_planes, num_blocks[2], stride=1)
        self.layer4 = self.make_layer(block, in_planes, num_blocks[3], stride=1)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.linear  = nn.Linear(in_planes + 2, num_classes)

    def make_layer(self, block, out_planes, num_blocks, stride):
            strides = [stride] + [1] * (num_blocks -1)
            layers = []
            for stride in strides:
                layers.append(block(self.in_planes, out_planes))
                self.in_planes = out_planes
            return nn.Sequential(*layers)

    def forward(self, x, g):
        out = self.conv(x)
        out = self.cab(out)
        out = self.bn(out)
        out = F.relu(out)
        out = self.maxpool(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
#         print(out.shape)
        g = g.unsqueeze(-1).unsqueeze(-1)
        g = g.expand(-1, -1, 1, 1).float()
#         g = self.avgpool(g)
#         print(g.shape)
        out = torch.cat((out, g), dim=1)
        out = torch.flatten(out, 1)
        out = self.linear(out)
        return out


def ResNet18(in_planes, num_classes):
    return ResNet(block = IdentityBlock, in_planes = in_planes, num_blocks = [2, 2, 2, 2], num_classes = num_classes)

In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F

in_planes = 64
num_classes = 15

def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        bias=False,
        padding = 1,
        padding_mode='zeros'
    )


class Residual(nn.Module):
    def __init__(self, numIn, numOut, stride = 1):
        super(Residual, self).__init__()
        self.numIn = numIn
        self.numOut = numOut
        self.stride = stride
        self.conv1 = nn.Conv2d(self.numIn, self.numOut, bias = False, kernel_size = 3,stride = self.stride,padding = 1)
        self.bn1 = nn.BatchNorm2d(self.numOut)
        self.relu = nn.ReLU(inplace = True)
        self.conv2 = nn.Conv2d(self.numOut, self.numOut, bias = False, kernel_size = 3, stride = self.stride, padding = 1)
        self.bn2 = nn.BatchNorm2d(self.numOut)
        
        if self.numIn != self.numOut:
            self.conv4 = nn.Conv2d(self.numIn, self.numOut, bias = True, kernel_size = 1)
            
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        
        if self.numIn != self.numOut:
            residual = self.conv4(x)
        
        return out + residual
    
class CAB(nn.Module):
    def __init__(self, channel):
        super(CAB, self).__init__()
        self.cab = nn.Sequential(
            nn.Conv2d(channel, channel, kernel_size=3, padding=1, stride=1),
            nn.GELU(),
            nn.Conv2d(channel, channel, kernel_size=3, padding=1, stride=1),
            ChannelAttention(channel)
            )
    def forward(self, x):
        return self.cab(x)
    

class  ResNet_CAM(nn.Module):
    def __init__(self,nOut):
        super(ResNet_CAM, self).__init__()
        self.cab1 = CAB(32)
        self.cab2 = CAB(34)
        self.nOut = nOut
        self.conv1 = nn.Conv2d(3, 32, kernel_size = 3, stride = 1, padding = 3,bias = False)#320
        self.conv2 = conv3x3(34, 34, 1)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace = True)
        self.maxpool = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.res1 = Residual(32,32)
        self.res2 = Residual(32,32)
        
        self.res3 = Residual(32,32)
        self.res4 = Residual(32,32)
        
        self.res5 = Residual(32,32)
        self.res6 = Residual(32,32)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.linear  = nn.Linear(in_planes//2 + 2, num_classes)
        
        self.lr1 = nn.Linear(64*16*16,256)
        self.gap = nn.AvgPool2d(kernel_size = 14, stride = 1)
        self.lr1 = nn.Linear(32,nOut)
        self.lr2 = nn.Linear(256,nOut)

    def forward(self, out, g):#Bx3X224x224
        out = self.conv1(out)#Bx64x224x224
        out = self.cab1(out)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.maxpool(out)#Bx64x112x112
        out = self.res1(out)
        out = self.res2(out)
        out = self.maxpool(out)#Bx64x56x56
        out = self.res3(out)
        out = self.res4(out)
        out = self.maxpool(out)#Bx64x28x28
        out = self.res5(out)
        out = self.res6(out)
        out = self.maxpool(out)#Bx64x14x14
        out = self.avgpool(out)
#         print(out.shape)
        g = g.unsqueeze(-1).unsqueeze(-1)
        g = g.expand(-1, -1, 1, 1).float()
        out = torch.cat((out, g), dim=1)
#         print(out.shape)
        out = self.cab2(out)
#         print(out.shape)
        out = torch.flatten(out, 1)
        out = self.linear(out)
            
        return out

In [26]:
def validation(model, criterion, device):
    model.eval()
    
    with torch.no_grad():
        for i, data in enumerate(zip(tqdm(RP_val_loader), Gps_val_loader)):
            data1, data2 = data
            images, labels = data1
            gps, _ = data2
            
            images = images.to(device)
            gps = gps.to(device)
            labels = labels.to(device)
            
            logit = model(images)
            
            loss = criterion(logit, labels)
            
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='micro')
    return _val_loss, _val_score

In [27]:
def train(model, optimizer, scheduler, device):
    model.to(device)
#     criterion = FocalLoss().to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        
        val_loss = []
        preds, trues = [], []
    
        for i, data in enumerate(zip(tqdm(RP_train_loader), Gps_train_loader)):
            data1, data2 = data
            images, labels = data1
            gps, _ = data2
            
            images = images.to(device)
            gps = gps.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(images)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
            
        model.eval()
    
        with torch.no_grad():
            for i, data in enumerate(zip(tqdm(RP_val_loader), Gps_val_loader)):
                data1, data2 = data
                images, labels = data1
                gps, _ = data2

                images = images.to(device)
                gps = gps.to(device)
                labels = labels.to(device)

                logit = model(images)

                loss = criterion(logit, labels)

                val_loss.append(loss.item())

                preds += logit.argmax(1).detach().cpu().numpy().tolist()
                trues += labels.detach().cpu().numpy().tolist()

            _val_loss = np.mean(val_loss)

        _val_score = f1_score(trues, preds, average='micro')

        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

In [28]:
# model = resnet.ResNet18(64, 15)
# model = ResNet18(64, 15)
model = models.vgg16(pretrained=True)
model



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [29]:
num_ftrs = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_ftrs, 15)
model = model.to(device)
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, scheduler, device)

100%|██████████| 1224/1224 [08:58<00:00,  2.27it/s]
100%|██████████| 136/136 [00:32<00:00,  4.25it/s]


Epoch [1], Train Loss : [2.55004] Val Loss : [2.49529] Val F1 : [0.17188]


100%|██████████| 1224/1224 [08:51<00:00,  2.30it/s]
100%|██████████| 136/136 [00:30<00:00,  4.50it/s]


Epoch [2], Train Loss : [2.45308] Val Loss : [2.38576] Val F1 : [0.23415]


100%|██████████| 1224/1224 [08:51<00:00,  2.30it/s]
100%|██████████| 136/136 [00:30<00:00,  4.51it/s]


Epoch [3], Train Loss : [2.39440] Val Loss : [2.36226] Val F1 : [0.24770]


100%|██████████| 1224/1224 [08:50<00:00,  2.31it/s]
100%|██████████| 136/136 [00:29<00:00,  4.55it/s]


Epoch [4], Train Loss : [2.35308] Val Loss : [2.32530] Val F1 : [0.25574]


100%|██████████| 1224/1224 [08:52<00:00,  2.30it/s]
100%|██████████| 136/136 [00:30<00:00,  4.49it/s]


Epoch [5], Train Loss : [2.31848] Val Loss : [2.29389] Val F1 : [0.27275]


100%|██████████| 1224/1224 [08:58<00:00,  2.27it/s]
100%|██████████| 136/136 [00:30<00:00,  4.49it/s]


Epoch [6], Train Loss : [2.28199] Val Loss : [2.30926] Val F1 : [0.26792]


100%|██████████| 1224/1224 [08:55<00:00,  2.29it/s]
100%|██████████| 136/136 [00:30<00:00,  4.51it/s]


Epoch [7], Train Loss : [2.25384] Val Loss : [2.26352] Val F1 : [0.27872]


100%|██████████| 1224/1224 [08:52<00:00,  2.30it/s]
100%|██████████| 136/136 [00:30<00:00,  4.52it/s]


Epoch [8], Train Loss : [2.22272] Val Loss : [2.28078] Val F1 : [0.27734]


100%|██████████| 1224/1224 [08:50<00:00,  2.31it/s]
100%|██████████| 136/136 [00:29<00:00,  4.57it/s]


Epoch [9], Train Loss : [2.19451] Val Loss : [2.25245] Val F1 : [0.28837]


100%|██████████| 1224/1224 [08:40<00:00,  2.35it/s]
100%|██████████| 136/136 [00:30<00:00,  4.51it/s]


Epoch [10], Train Loss : [2.16501] Val Loss : [2.24086] Val F1 : [0.29504]


100%|██████████| 1224/1224 [08:40<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [11], Train Loss : [2.12285] Val Loss : [2.23766] Val F1 : [0.29848]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [12], Train Loss : [2.08299] Val Loss : [2.26546] Val F1 : [0.29619]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [13], Train Loss : [2.04179] Val Loss : [2.32883] Val F1 : [0.27987]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [14], Train Loss : [1.98885] Val Loss : [2.35079] Val F1 : [0.29205]
Epoch 00014: reducing learning rate of group 0 to 1.5000e-04.


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [15], Train Loss : [1.78022] Val Loss : [2.38558] Val F1 : [0.28745]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.55it/s]


Epoch [16], Train Loss : [1.59726] Val Loss : [2.58589] Val F1 : [0.27091]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [17], Train Loss : [1.40544] Val Loss : [2.81788] Val F1 : [0.27413]
Epoch 00017: reducing learning rate of group 0 to 7.5000e-05.


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [18], Train Loss : [1.05212] Val Loss : [3.32388] Val F1 : [0.25919]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [19], Train Loss : [0.81885] Val Loss : [3.80912] Val F1 : [0.26585]


100%|██████████| 1224/1224 [08:40<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [20], Train Loss : [0.62924] Val Loss : [4.47674] Val F1 : [0.25414]
Epoch 00020: reducing learning rate of group 0 to 3.7500e-05.


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.54it/s]


Epoch [21], Train Loss : [0.42528] Val Loss : [5.26667] Val F1 : [0.24954]


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [22], Train Loss : [0.32982] Val Loss : [5.87306] Val F1 : [0.25391]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.57it/s]


Epoch [23], Train Loss : [0.27550] Val Loss : [6.36595] Val F1 : [0.25299]
Epoch 00023: reducing learning rate of group 0 to 1.8750e-05.


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.55it/s]


Epoch [24], Train Loss : [0.21014] Val Loss : [6.81309] Val F1 : [0.25391]


100%|██████████| 1224/1224 [08:40<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [25], Train Loss : [0.18369] Val Loss : [7.20006] Val F1 : [0.25506]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [26], Train Loss : [0.16058] Val Loss : [7.62806] Val F1 : [0.25276]
Epoch 00026: reducing learning rate of group 0 to 9.3750e-06.


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [27], Train Loss : [0.14161] Val Loss : [7.93036] Val F1 : [0.25506]


100%|██████████| 1224/1224 [08:40<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [28], Train Loss : [0.13283] Val Loss : [8.10717] Val F1 : [0.25460]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.60it/s]


Epoch [29], Train Loss : [0.12333] Val Loss : [8.33465] Val F1 : [0.25551]
Epoch 00029: reducing learning rate of group 0 to 4.6875e-06.


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [30], Train Loss : [0.11402] Val Loss : [8.50471] Val F1 : [0.25460]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [31], Train Loss : [0.11010] Val Loss : [8.54390] Val F1 : [0.25483]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.55it/s]


Epoch [32], Train Loss : [0.10742] Val Loss : [8.69961] Val F1 : [0.25574]
Epoch 00032: reducing learning rate of group 0 to 2.3437e-06.


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [33], Train Loss : [0.09992] Val Loss : [8.78452] Val F1 : [0.25666]


100%|██████████| 1224/1224 [08:40<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.57it/s]


Epoch [34], Train Loss : [0.10015] Val Loss : [8.85126] Val F1 : [0.25735]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [35], Train Loss : [0.09692] Val Loss : [8.90442] Val F1 : [0.25551]
Epoch 00035: reducing learning rate of group 0 to 1.1719e-06.


100%|██████████| 1224/1224 [08:40<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [36], Train Loss : [0.09733] Val Loss : [8.97364] Val F1 : [0.25574]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [37], Train Loss : [0.09734] Val Loss : [9.02421] Val F1 : [0.25368]


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.57it/s]


Epoch [38], Train Loss : [0.09542] Val Loss : [9.02155] Val F1 : [0.25620]
Epoch 00038: reducing learning rate of group 0 to 5.8594e-07.


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.55it/s]


Epoch [39], Train Loss : [0.09312] Val Loss : [9.02971] Val F1 : [0.25528]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [40], Train Loss : [0.09401] Val Loss : [9.05835] Val F1 : [0.25551]


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.57it/s]


Epoch [41], Train Loss : [0.09369] Val Loss : [9.06950] Val F1 : [0.25620]
Epoch 00041: reducing learning rate of group 0 to 2.9297e-07.


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.57it/s]


Epoch [42], Train Loss : [0.09335] Val Loss : [9.08128] Val F1 : [0.25551]


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [43], Train Loss : [0.09271] Val Loss : [9.08607] Val F1 : [0.25666]


100%|██████████| 1224/1224 [08:40<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.57it/s]


Epoch [44], Train Loss : [0.09153] Val Loss : [9.09728] Val F1 : [0.25597]
Epoch 00044: reducing learning rate of group 0 to 1.4648e-07.


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.53it/s]


Epoch [45], Train Loss : [0.09277] Val Loss : [9.09566] Val F1 : [0.25597]


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.55it/s]


Epoch [46], Train Loss : [0.09150] Val Loss : [9.10112] Val F1 : [0.25597]


100%|██████████| 1224/1224 [08:39<00:00,  2.36it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [47], Train Loss : [0.09180] Val Loss : [9.10698] Val F1 : [0.25597]
Epoch 00047: reducing learning rate of group 0 to 7.3242e-08.


100%|██████████| 1224/1224 [08:40<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.56it/s]


Epoch [48], Train Loss : [0.09250] Val Loss : [9.10872] Val F1 : [0.25666]


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.58it/s]


Epoch [49], Train Loss : [0.09031] Val Loss : [9.11026] Val F1 : [0.25643]


100%|██████████| 1224/1224 [08:39<00:00,  2.35it/s]
100%|██████████| 136/136 [00:29<00:00,  4.57it/s]

Epoch [50], Train Loss : [0.09102] Val Loss : [9.11502] Val F1 : [0.25597]
Epoch 00050: reducing learning rate of group 0 to 3.6621e-08.





In [None]:
RP_test_dataset = RPDataset(df=test_df, rp_path_list=test_df['img_path'].values, label_list=test_df['action'].values, tfms=RP_tfms)
RP_test_loader = DataLoader(RP_test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

Gps_test_dataset = GpsDataset(df=test_df, lat_path_list=test_df['lat'].values, lon_path_list=test_df['lon'].values, label_list=test_df['action'].values, tfms=Gps_tfms)
Gps_test_loader = DataLoader(Gps_test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

In [None]:
def inference(model, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for i, data in enumerate(zip(tqdm(RP_test_loader), Gps_test_loader)):
            data1, data2 = data
            images, labels = data1
            gps, _ = data2
            
            images = images.to(device)
            gps = gps.to(device)
            labels = labels.to(device)
            
            logit = model(images)
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

In [None]:
preds = inference(model, device)

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix = confusion_matrix(test_df['action'], preds, labels=[x for x in range(0, 15)])
confusion_matrix

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

plt.figure(figsize = (25,25))
plt.title('Confusion Matrix')

sns.heatmap(confusion_matrix, annot=True)

In [None]:
from sklearn.metrics import f1_score 
f1 = f1_score(test_df['action'], preds, average='micro')
print('F1-score: {0:.4f}'.format(f1))

In [None]:
torch.save(model, f'./save_model/0425_RPmGps_lr3e4_pre.pt')

In [None]:
from sklearn.metrics import classification_report
y_true = test_df['action']
y_pred = preds
target_names = [str(x) for x in range(15)]
print(classification_report(y_true, y_pred, target_names=target_names))

In [None]:
y_true = test_df['action']
y_pred = preds
target_names = [str(x) for x in range(15)]
print(classification_report(y_true, y_pred, target_names=target_names))