In [None]:
!pip install torchsummary
!pip install scikit-learn
!pip install split-folders
!pip install pandas
!pip install opencv-python
!pip install albumentations

In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torchvision.datasets as dset
import torchvision.transforms as transforms
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.models as models
import cv2
import sys
import math
import random
import splitfolders
import torchsummary
from tqdm.auto import tqdm
from resnet import ResNet18
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score

In [None]:
torch.cuda.is_available()

In [None]:
data_path = './userdata/user04'

user_dir_list = os.listdir(data_path)

user_label_list = []
for element in user_dir_list:
    label_data_path = data_path + '/' + element + '/' + element + '_label.csv'
    label_data = pd.read_csv(label_data_path)
    user_label_list.append(label_data)

df = pd.concat(user_label_list)
tmp_df = df
# df['ts'] = pd.to_datetime(df['ts'], unit='s')
df = df.sort_values('ts')
df = df.drop(columns=['actionSub', 'actionSubOption', 'condition', 'conditionSub1Option', 'conditionSub2Option', 'place', 'emotionPositive', 'emotionTension'])

df.head()

In [None]:
len(df)

In [None]:
df['actionOption'].value_counts()

In [None]:
len(df['actionOption'].value_counts())

In [None]:
rp_data_path = './RPdata/user04/'
tmp_path = rp_data_path + '/1598828400/RP/'
tmp_list_dir = os.listdir(tmp_path)

In [None]:
tmp_list_dir.sort()
tmp_list_dir

In [None]:
tmp_list_dir[0][:-4]

In [None]:
df[df['ts'] == 1598829240]

In [None]:
df['ts'] = df['ts'].apply(lambda x : str(x)[:-2])

In [None]:
df

In [None]:
rp_dir_list = os.listdir(rp_data_path)
rp_df = pd.DataFrame(columns = ['ts', 'rp_path'])
rp_df

for element in rp_dir_list:
    sub_list = os.listdir(rp_data_path + element + '/RP')
    for img in sub_list:
        new_row = {
            'ts' : img[:-4],
            'rp_path' : rp_data_path + element + '/RP/' + img
        }
        rp_df = rp_df.append(new_row, ignore_index=True)
    

rp_df

In [None]:
df = pd.merge(df, rp_df, on = 'ts')

In [None]:
df

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le = LabelEncoder()

df['actionOption'] =le.fit_transform(df['actionOption'])

In [None]:
df

Train process

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':30,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':16,
    'SEED':42
}

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

In [None]:
train, val, _, _ = train_test_split(df, df['actionOption'], test_size=0.2, random_state=CFG['SEED'])

In [None]:
train

In [None]:
val

In [None]:
tfms = A.Compose([
    A.Resize(width=CFG['IMG_SIZE'], height=CFG['IMG_SIZE']),
    A.Normalize()
], p=1)

In [None]:
class CustomDataset(Dataset):
    def __init__(self, df, rp_path_list, label_list, tfms=None):
        super().__init__()
        self.df = df
        self.rp_path_list = rp_path_list
        self.label_list = label_list
        self.tfms=tfms
    
    def __len__(self):
        return len(self.rp_path_list)
    
    def __getitem__(self, idx):
        img = cv2.imread(self.rp_path_list[idx])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        image = self.tfms(image=img)['image']
        image = torch.tensor(np.array(image)).permute(2, 0, 1)
        
        if self.label_list is not None:
            label = self.label_list[idx]
            return image, label
        else:
            return image

In [None]:
train_dataset = CustomDataset(df=df, rp_path_list=train['rp_path'].values, label_list=train['actionOption'].values, tfms=tfms)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(df=df,rp_path_list=val['rp_path'].values, label_list=val['actionOption'].values, tfms=tfms)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for images, labels in tqdm(iter(train_loader)):
            images = images.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(images)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

In [None]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for images, labels in tqdm(iter(val_loader)):
            images = images.to(device)
            labels = labels.to(device)
            
            logit = model(images)
            
            loss = criterion(logit, labels)
            
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

In [None]:
model = ResNet18(64, 26)
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)