# Copy from https://www.kaggle.com/code/mayukh18/pytorch-fog-end-to-end-baseline-lb-0-254

In [1]:
import os
import gc
import random
import time
import math
import json
from tqdm import tqdm
import glob
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from sklearn.model_selection import train_test_split, StratifiedGroupKFold
from sklearn.metrics import accuracy_score, average_precision_score

import warnings
warnings.filterwarnings(action='ignore')
print("Env set ok ..")

Env set ok ..


# Stratified Group K Fold

It's mentioned in the data that the subjects are different in the train and test set and even different between the public/private splits of the test data. So we need to use Stratified Group K Fold. But since the positive instances in the sequences are very scarce, we need to pick up the best fold which will give us the best balance of the positive/negative instances.

### tdcsfog preprocessing

In [2]:
# Analysis of positive instances in each fold of our CV folds

n1_sum = []
n2_sum = []
n3_sum = []
count = []

# Here I am using the metadata file available during training. Since the code 
# will run again during submission, if 
# I used the usual file from the competition folder, 
# it would have been updated with the test files too.
metadata = pd.read_csv("/kaggle/input/copy-train-metadata/tdcsfog_metadata.csv")

for f in tqdm(metadata['Id']):
    fpath = f"/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/{f}.csv"
    df = pd.read_csv(fpath)
    
    n1_sum.append(np.sum(df['StartHesitation']))
    n2_sum.append(np.sum(df['Turn']))
    n3_sum.append(np.sum(df['Walking']))
    count.append(len(df))
    
print(f"32 files have positive values in all 3 classes")

metadata['n1_sum'] = n1_sum
metadata['n2_sum'] = n2_sum
metadata['n3_sum'] = n3_sum
metadata['count'] = count

sgkf = StratifiedGroupKFold(n_splits=5, random_state=42, shuffle=True)
for i, (train_index, valid_index) in enumerate(sgkf.split(X=metadata['Id'], y=[1]*len(metadata), groups=metadata['Subject'])):
    print(f"Fold = {i}")
    train_ids = metadata.loc[train_index, 'Id']
    valid_ids = metadata.loc[valid_index, 'Id']
    
    print(f"Length of Train = {len(train_index)}, Length of Valid = {len(valid_index)}")
    n1_sum = metadata.loc[train_index, 'n1_sum'].sum()
    n2_sum = metadata.loc[train_index, 'n2_sum'].sum()
    n3_sum = metadata.loc[train_index, 'n3_sum'].sum()
    print(f"Train classes: {n1_sum:,}, {n2_sum:,}, {n3_sum:,}")
    
    n1_sum = metadata.loc[valid_index, 'n1_sum'].sum()
    n2_sum = metadata.loc[valid_index, 'n2_sum'].sum()
    n3_sum = metadata.loc[valid_index, 'n3_sum'].sum()
    print(f"Valid classes: {n1_sum:,}, {n2_sum:,}, {n3_sum:,}")
    
# FOLD 2 is the most well balanced
# The actual train-test split (based on Fold 2)

metadata = pd.read_csv("/kaggle/input/copy-train-metadata/tdcsfog_metadata.csv")
sgkf = StratifiedGroupKFold(n_splits=5, random_state=42, shuffle=True)
for i, (train_index, valid_index) in enumerate(sgkf.split(X=metadata['Id'], y=[1]*len(metadata), groups=metadata['Subject'])):
    if i != 2:
        continue
    print(f"Fold = {i}")
    train_ids = metadata.loc[train_index, 'Id']
    valid_ids = metadata.loc[valid_index, 'Id']
    print(f"Length of Train = {len(train_ids)}, Length of Valid = {len(valid_ids)}")
    
    if i == 2:
        break
        
train_fpaths_tdcs = [f"/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/{_id}.csv" for _id in train_ids]
valid_fpaths_tdcs = [f"/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog/{_id}.csv" for _id in valid_ids]

100%|██████████| 833/833 [00:18<00:00, 46.01it/s]

32 files have positive values in all 3 classes
Fold = 0
Length of Train = 672, Length of Valid = 161
Train classes: 287,832, 1,462,652, 175,633
Valid classes: 16,958, 216,130, 32,205
Fold = 1
Length of Train = 613, Length of Valid = 220
Train classes: 51,748, 909,505, 65,242
Valid classes: 253,042, 769,277, 142,596
Fold = 2
Length of Train = 703, Length of Valid = 130
Train classes: 271,881, 1,332,746, 183,673
Valid classes: 32,909, 346,036, 24,165
Fold = 3
Length of Train = 649, Length of Valid = 184
Train classes: 303,710, 1,517,147, 205,196
Valid classes: 1,080, 161,635, 2,642
Fold = 4
Length of Train = 695, Length of Valid = 138
Train classes: 303,989, 1,493,078, 201,608
Valid classes: 801, 185,704, 6,230
Fold = 2
Length of Train = 703, Length of Valid = 130





### defog preprocessing

In [3]:
# Analysis of positive instances in each fold of our CV folds

n1_sum = []
n2_sum = []
n3_sum = []
count = []

# Here I am using the metadata file available during training. Since the code will run again during submission, if 
# I used the usual file from the competition folder, it would have been updated with the test files too.
metadata = pd.read_csv("/kaggle/input/copy-train-metadata/defog_metadata.csv")
metadata['n1_sum'] = 0
metadata['n2_sum'] = 0
metadata['n3_sum'] = 0
metadata['count'] = 0

for f in tqdm(metadata['Id']):
    fpath = f"/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/{f}.csv"
    if os.path.exists(fpath) == False:
        continue
        
    df = pd.read_csv(fpath)
    metadata.loc[metadata['Id'] == f, 'n1_sum'] = np.sum(df['StartHesitation'])
    metadata.loc[metadata['Id'] == f, 'n2_sum'] = np.sum(df['Turn'])
    metadata.loc[metadata['Id'] == f, 'n3_sum'] = np.sum(df['Walking'])
    metadata.loc[metadata['Id'] == f, 'count'] = len(df)
    
metadata = metadata[metadata['count'] > 0].reset_index()

sgkf = StratifiedGroupKFold(n_splits=5, random_state=42, shuffle=True)
for i, (train_index, valid_index) in enumerate(sgkf.split(X=metadata['Id'], y=[1]*len(metadata), groups=metadata['Subject'])):
    print(f"Fold = {i}")
    train_ids = metadata.loc[train_index, 'Id']
    valid_ids = metadata.loc[valid_index, 'Id']
    
    print(f"Length of Train = {len(train_index)}, Length of Valid = {len(valid_index)}")
    n1_sum = metadata.loc[train_index, 'n1_sum'].sum()
    n2_sum = metadata.loc[train_index, 'n2_sum'].sum()
    n3_sum = metadata.loc[train_index, 'n3_sum'].sum()
    print(f"Train classes: {n1_sum:,}, {n2_sum:,}, {n3_sum:,}")
    
    n1_sum = metadata.loc[valid_index, 'n1_sum'].sum()
    n2_sum = metadata.loc[valid_index, 'n2_sum'].sum()
    n3_sum = metadata.loc[valid_index, 'n3_sum'].sum()
    print(f"Valid classes: {n1_sum:,}, {n2_sum:,}, {n3_sum:,}")
    
# FOLD 2 is the most well balanced
# The actual train-test split (based on Fold 2)

sgkf = StratifiedGroupKFold(n_splits=5, random_state=42, shuffle=True)
for i, (train_index, valid_index) in enumerate(sgkf.split(X=metadata['Id'], y=[1]*len(metadata), groups=metadata['Subject'])):
    if i != 1:
        continue
    print(f"Fold = {i}")
    train_ids = metadata.loc[train_index, 'Id']
    valid_ids = metadata.loc[valid_index, 'Id']
    print(f"Length of Train = {len(train_ids)}, Length of Valid = {len(valid_ids)}")
    
    if i == 2:
        break
        
train_fpaths_de = [f"/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/{_id}.csv" for _id in train_ids]
valid_fpaths_de = [f"/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/defog/{_id}.csv" for _id in valid_ids]

100%|██████████| 137/137 [00:23<00:00,  5.77it/s]

Fold = 0
Length of Train = 75, Length of Valid = 16
Train classes: 500, 428,683, 37,609
Valid classes: 0, 158,803, 60,910
Fold = 1
Length of Train = 65, Length of Valid = 26
Train classes: 216, 490,429, 84,955
Valid classes: 284, 97,057, 13,564
Fold = 2
Length of Train = 76, Length of Valid = 15
Train classes: 410, 488,634, 87,986
Valid classes: 90, 98,852, 10,533
Fold = 3
Length of Train = 70, Length of Valid = 21
Train classes: 435, 424,494, 88,800
Valid classes: 65, 162,992, 9,719
Fold = 4
Length of Train = 78, Length of Valid = 13
Train classes: 439, 517,704, 94,726
Valid classes: 61, 69,782, 3,793
Fold = 1
Length of Train = 65, Length of Valid = 26





In [4]:
train_fpaths = [(f, 'de') for f in train_fpaths_de] + [(f, 'tdcs') for f in train_fpaths_tdcs]
valid_fpaths = [(f, 'de') for f in valid_fpaths_de] + [(f, 'tdcs') for f in valid_fpaths_tdcs]

# DataLoader

We use a window comprised of past and future time Acc readings to form our dataset for a particular time instance. In case some portion of the window data is not available, we pad them with zeros.

In [5]:
class FOGDataset1(Dataset):
    def __init__(self, fpaths, datacfg, scale=9.806, split="train"):
        super(FOGDataset, self).__init__()
        tm = time.time()
        self.split = split
        self.scale = scale
        self.datacfg = datacfg
        self.fpaths = fpaths
        self.dfs = [self.read(f[0], f[1]) for f in fpaths]
        self.f_ids = [os.path.basename(f[0])[:-4] for f in self.fpaths]
        
        self.end_indices = []
        self.shapes = []
        _length = 0
        for df in self.dfs:
            self.shapes.append(df.shape[0])
            _length += df.shape[0]
            self.end_indices.append(_length)
        
        self.dfs = np.concatenate(self.dfs, axis=0).astype(np.float16)
        self.length = self.dfs.shape[0]
        
        shape1 = self.dfs.shape[1]
        
        self.dfs = np.concatenate([np.zeros((cfg.wx*cfg.window_past, shape1)), self.dfs, np.zeros((cfg.wx*cfg.window_future, shape1))], axis=0)
        print(f"Dataset initialized in {time.time() - tm} secs!")
        gc.collect()
        
    def read(self, f, _type):
        df = pd.read_csv(f)
        if self.split == "test":
            return np.array(df)
        
        if _type =="tdcs":
            df['Valid'] = 1
            df['Task'] = 1
            df['tdcs'] = 1
        else:
            df['tdcs'] = 0
        
        return np.array(df)
            
    def __getitem__(self, index):
        if self.split == "train":
            row_idx = random.randint(0, self.length-1) + cfg.wx*cfg.window_past
        elif self.split == "test":
            for i,e in enumerate(self.end_indices):
                if index >= e:
                    continue
                df_idx = i
                break

            row_idx_true = self.shapes[df_idx] - (self.end_indices[df_idx] - index)
            _id = self.f_ids[df_idx] + "_" + str(row_idx_true)
            row_idx = index + cfg.wx*cfg.window_past
        else:
            row_idx = index + cfg.wx*cfg.window_past
            
        #scale = 9.806 if self.dfs[row_idx, -1] == 1 else 1.0
        x = self.dfs[
            row_idx - self.datacfg.wx*self.datacfg.window_past : \
            row_idx + self.datacfg.wx*self.datacfg.window_future, 1:4
        ]
        x = x[::self.datacfg.wx, :][::-1, :]
        x = torch.tensor(x.astype('float'))#/scale
        
        t = self.dfs[row_idx, -3]*self.dfs[row_idx, -2]
        
        if self.split == "test":
            return _id, x, t
        
        y = self.dfs[row_idx, 4:7].astype('float')
        y = torch.tensor(y)
        
        return x, y, t
    
    def __len__(self):
        # return self.length
        if self.split == "train":
            return 5_000_000
        return self.length

In [6]:
class FOGDataset(Dataset):
    def __init__(self, fpaths, datacfg, scale=9.806, split="train"):
        super(FOGDataset, self).__init__()
        tm = time.time()
        self.split = split
        self.scale = scale
        self.datacfg = datacfg
        self.fpaths = fpaths
        self.dfs = [self.read(f[0], f[1]) for f in fpaths]
        self.f_ids = [os.path.basename(f[0])[:-4] for f in self.fpaths]
        
        self.end_indices = []
        self.shapes = []
        _length = 0
        for df in self.dfs:
            self.shapes.append(df.shape[0])
            _length += df.shape[0]
            self.end_indices.append(_length)
        
        self.dfs = np.concatenate(self.dfs, axis=0).astype(np.float16)
        self.length = self.dfs.shape[0]
        
        shape1 = self.dfs.shape[1]
        
        self.dfs = np.concatenate(
            [
                np.zeros((datacfg.wx*datacfg.window_past, shape1)), 
                self.dfs, np.zeros((datacfg.wx*datacfg.window_future, shape1))
            ], 
            axis=0
        )
        print(f"Dataset initialized in {time.time() - tm} secs!")
        gc.collect()
        
    def read(self, f, _type):
        df = pd.read_csv(f)
        if self.split == "test":
            return np.array(df)
        
        if _type =="tdcs":
            df['Valid'] = 1
            df['Task'] = 1
            df['tdcs'] = 1
        else:
            df['tdcs'] = 0
        
        return np.array(df)
            
    def __getitem__(self, index):
        if self.split == "train":
            row_idx = random.randint(0, self.length-1) + self.datacfg.wx*self.datacfg.window_past
        elif self.split == "test":
            for i,e in enumerate(self.end_indices):
                if index >= e:
                    continue
                df_idx = i
                break

            row_idx_true = self.shapes[df_idx] - (self.end_indices[df_idx] - index)
            _id = self.f_ids[df_idx] + "_" + str(row_idx_true)
            row_idx = index + self.datacfg.wx*self.datacfg.window_past
        else:
            row_idx = index + self.datacfg.wx*self.datacfg.window_past
            
        #scale = 9.806 if self.dfs[row_idx, -1] == 1 else 1.0
        x = self.dfs[
            row_idx - self.datacfg.wx*self.datacfg.window_past : \
            row_idx + self.datacfg.wx*self.datacfg.window_future, 1:4
        ]
        x = x[::self.datacfg.wx, :][::-1, :]
        x = torch.tensor(x.astype('float'))#/scale
        
        t = self.dfs[row_idx, -3]*self.dfs[row_idx, -2]
        
        if self.split == "test":
            return _id, x, t
        
        y = self.dfs[row_idx, 4:7].astype('float')
        y = torch.tensor(y)
        
        return x, y, t
    
    def __len__(self):
        # return self.length
        if self.split == "train":
            return 5_000_000
        return self.length

In [7]:

class DataConfig:
    train_dir1 = "/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/defog"
    train_dir2 = "/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/train/tdcsfog"

    batch_size = 1024                              #batch size大小
    window_size = 32                               #每次Input的總時序資料長度
    window_future = 8                              #取目標時間點後多少為Input
    window_past = window_size - window_future      #取目標時間點前多少為Input
    
    wx = 8                                         #資料padding長度的參數       
    
    feature_list = ['AccV', 'AccML', 'AccAP']
    label_list = ['StartHesitation', 'Turn', 'Walking']

datacfg = DataConfig()
    

In [8]:
train_dataset = FOGDataset(train_fpaths, datacfg, split="train")
valid_dataset = FOGDataset(valid_fpaths, datacfg, split="valid")
print(f"lengths of datasets: train - {len(train_dataset)}, valid - {len(valid_dataset)}")
train_loader = DataLoader(train_dataset, batch_size=datacfg.batch_size, num_workers=5, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=datacfg.batch_size, num_workers=5)


Dataset initialized in 42.650877237319946 secs!
Dataset initialized in 12.80724287033081 secs!
lengths of datasets: train - 5000000, valid - 4984740


# Training kits

In [9]:
from torch.cuda.amp import GradScaler

def count_parameters(model:nn.Module):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def train_one_epoch(model, loader, optimizer, criterion, cfg):
    loss_sum = 0.
    scaler = GradScaler()
    
    model.train()
    for x,y,t in tqdm(loader):                               #從DataLoader中取得一個Batch size的資料
        x = x.to(cfg.device).float()
        y = y.to(cfg.device).float()
        t = t.to(cfg.device).float()
        
        y_pred = model(x)                                    #將Input x丟入模型，得到y_pred
        loss = criterion(y_pred, y)                          #計算預測結果y_pred與GT y的loss
        loss = torch.mean(loss*t.unsqueeze(-1), dim=1)
        
        t_sum = torch.sum(t)
        if t_sum > 0:
            loss = torch.sum(loss)/t_sum
        else:
            loss = torch.sum(loss)*0.
        
        # loss.backward()
        scaler.scale(loss).backward()                        #根據loss做反向傳播
        # optimizer.step()
        scaler.step(optimizer)                               #優化器優化模型參數
        scaler.update()                                      
        
        optimizer.zero_grad()                                #優化器歸零
        
        loss_sum += loss.item()
    total_loss = (loss_sum/len(loader))
    print(f"Train Loss: {total_loss:.04f}")
    
    return total_loss

def validation_one_epoch(model, loader, criterion, cfg):
    loss_sum = 0.
    y_true_epoch = []
    y_pred_epoch = []
    t_valid_epoch = []
    
    model.eval()
    for x,y,t in tqdm(loader):
        x = x.to(cfg.device).float()
        y = y.to(cfg.device).float()
        t = t.to(cfg.device).float()
        
        with torch.no_grad():                                #沒有反向傳播
            y_pred = model(x)
            loss = criterion(y_pred, y)
            loss = torch.mean(loss*t.unsqueeze(-1), dim=1)
            
            t_sum = torch.sum(t)
            if t_sum > 0:
                loss = torch.sum(loss)/t_sum
            else:
                loss = torch.sum(loss)*0.
        
        loss_sum += loss.item()
        y_true_epoch.append(y.cpu().numpy())
        y_pred_epoch.append(y_pred.cpu().numpy())
        t_valid_epoch.append(t.cpu().numpy())
        
    y_true_epoch = np.concatenate(y_true_epoch, axis=0)
    y_pred_epoch = np.concatenate(y_pred_epoch, axis=0)
    
    t_valid_epoch = np.concatenate(t_valid_epoch, axis=0)
    y_true_epoch = y_true_epoch[t_valid_epoch > 0, :]
    y_pred_epoch = y_pred_epoch[t_valid_epoch > 0, :]
    
    scores = [average_precision_score(y_true_epoch[:,i], y_pred_epoch[:,i]) for i in range(3)]
    mean_score = np.mean(scores)
    print(f"Validation Loss: {(loss_sum/len(loader)):.04f}, Validation Score: {mean_score:.03f}, ClassWise: {scores[0]:.03f},{scores[1]:.03f},{scores[2]:.03f}")
    
    return mean_score




In [10]:
gc.collect()

21

### Build Model & Optr

In [11]:
class ResNet20(nn.Module):
    
    def __init__(self, feature_channel, pooling_kernelsize = 3) -> None:
        super(ResNet20, self).__init__()       
        self.conv1 = nn.Sequential(
            nn.Conv1d(feature_channel, 16, 3, padding=1),
            nn.BatchNorm1d(16),
            nn.ReLU(inplace=True)
        )
        self.ResBlocks = nn.Sequential(
            self.__ResBlock(16, 16),
            self.__ResBlock(16, 32),
            self.__ResBlock(32, 64),
        )
        self.poolkernelsize = pooling_kernelsize
    
    class ResidualBlock(nn.Module) :
    
        def __init__(self, in_channel, out_channel) -> None:
            super().__init__()
            self.seq =nn.Sequential(
                nn.Conv1d(in_channel, out_channel,3,padding=1),
                nn.BatchNorm1d(out_channel),
                nn.ReLU(inplace=True),
                nn.Conv1d(out_channel, out_channel, 3,padding=1),
                nn.BatchNorm1d(out_channel)
            )
            self.neck = nn.Sequential()
            if in_channel != out_channel:
                self.neck = nn.Sequential(
                    nn.Conv1d(in_channel, out_channel,3,padding=1),
                    nn.BatchNorm1d(out_channel)
                )
        
        def forward(self, x)->torch.Tensor:
            return F.relu(self.seq(x) + self.neck(x))

    def __ResBlock(self,cin, cout):
        clist = [(cin, cout), (cout, cout), (cout, cout)]
        return nn.Sequential(
            *list(self.ResidualBlock(ci, co) for ci, co in clist)
        )
    
    def forward(self, x)->torch.Tensor :
        x1 = self.conv1(x.permute(0,2,1))
        x1 = self.ResBlocks(x1)
        x1 = x1.permute(0,2,1)
        if self.poolkernelsize > 1:
            x1 = F.avg_pool1d(x1, kernel_size=self.poolkernelsize)
        return x1.reshape(x1.shape[0], -1) #flatten


class ResConvClassifier(nn.Module):
    def __init__(
            self, feature_channels, seqlen, out_features, 
            classifier_layers=[128], resnet_pooling_kernelsize = 1
        ) -> None:
        
        """
        ResetNet20 output dimensions = floor(64/resnet_pooling_kernelsize)
        
        """
        
        super(ResConvClassifier, self).__init__()
        self.emd = ResNet20(feature_channels,resnet_pooling_kernelsize)
        
        assert resnet_pooling_kernelsize > 0
        resnet_out = seqlen*int(math.floor(64.0/float(resnet_pooling_kernelsize)))
        
        layers = [resnet_out] + classifier_layers + [out_features]
        if (layers[1] > layers[0]):
            print(f"Waring ! Resnet20 output are {layers[0]}")
            print(f"and you give {layers[1]}, try to lift dimension ????? ")
        if (layers[-1] > classifier_layers[-2]):
            print(f"Waring ! output are {classifier_layers[-1]}")
            print(f"and you give {classifier_layers[-2]}, try to lift dimension ????? ")
        
        c = []
        j = -1
        for i in range(len(classifier_layers)):
            c.append(self.__LinBlock(layers[i],layers[i+1]))
            j = i
        c.append(nn.Linear(layers[j+1],layers[j+2]))
        self.classifier = nn.Sequential(*c)

    def __LinBlock(self, ind, outd)->nn.Sequential:
        return nn.Sequential(
            nn.Linear(ind, outd),
            nn.BatchNorm1d(outd),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        x1 = self.emd(x)
        x1 = self.classifier(x1)
        return x1

class Config:
    
    optimizer_name = "Adam"                       
    loss_function = "BCEWithLogitsLoss"           
    lr = 0.01                           
    num_epochs = 50                             
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'         
    classifier_layers = [1024,1024]
    pooling_kernel = 1
    
cfg = Config()
    

model = ResConvClassifier(
    feature_channels=len(datacfg.feature_list), 
    seqlen=datacfg.window_size, 
    out_features=len(datacfg.label_list), 
    resnet_pooling_kernelsize = cfg.pooling_kernel,
    classifier_layers = cfg.classifier_layers
).to(cfg.device)
print(f"Number of parameters in model - {count_parameters(model):,}")

optimizer = getattr(torch.optim, cfg.optimizer_name)(model.parameters(), lr=cfg.lr)
criterion = getattr(torch.nn, cfg.loss_function)(reduction='none').to(cfg.device)

Number of parameters in model - 3,254,211


### Training and Validation

In [12]:
loss_per_iter = []
valid_per_iter = []
max_score = 0.0
#sched = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.85)
for epoch in range(cfg.num_epochs):
    print(f"Epoch: {epoch}")
    loss = train_one_epoch(model, train_loader, optimizer, criterion,cfg)
    score = validation_one_epoch(model, valid_loader, criterion, cfg)
    #sched.step()
    loss_per_iter.append(loss)
    valid_per_iter.append(score)
    
    if score > max_score:
        max_score = score
        torch.save(model.state_dict(), "best_model_state_b1.h5")      #儲存最好的模型參數
        print("Saving Model ...")

    print("="*50)
    
gc.collect()

Epoch: 0


100%|██████████| 4883/4883 [06:26<00:00, 12.62it/s]


Train Loss: 0.1485


100%|██████████| 4868/4868 [02:37<00:00, 30.99it/s]


Validation Loss: 0.0871, Validation Score: 0.240, ClassWise: 0.053,0.609,0.058
Saving Model ...
Epoch: 1


100%|██████████| 4883/4883 [06:21<00:00, 12.80it/s]


Train Loss: 0.1093


100%|██████████| 4868/4868 [02:33<00:00, 31.70it/s]


Validation Loss: 0.1114, Validation Score: 0.250, ClassWise: 0.031,0.693,0.026
Saving Model ...
Epoch: 2


100%|██████████| 4883/4883 [06:24<00:00, 12.69it/s]


Train Loss: 0.0849


100%|██████████| 4868/4868 [02:35<00:00, 31.25it/s]


Validation Loss: 0.1320, Validation Score: 0.258, ClassWise: 0.034,0.690,0.049
Saving Model ...
Epoch: 3


100%|██████████| 4883/4883 [06:24<00:00, 12.68it/s]


Train Loss: 0.0678


100%|██████████| 4868/4868 [02:37<00:00, 30.90it/s]


Validation Loss: 0.1797, Validation Score: 0.245, ClassWise: 0.020,0.678,0.039
Epoch: 4


100%|██████████| 4883/4883 [06:26<00:00, 12.64it/s]


Train Loss: 0.0562


100%|██████████| 4868/4868 [02:38<00:00, 30.75it/s]


Validation Loss: 0.1789, Validation Score: 0.215, ClassWise: 0.034,0.573,0.037
Epoch: 5


100%|██████████| 4883/4883 [06:28<00:00, 12.55it/s]


Train Loss: 0.0479


100%|██████████| 4868/4868 [02:39<00:00, 30.50it/s]


Validation Loss: 0.1799, Validation Score: 0.245, ClassWise: 0.032,0.672,0.033
Epoch: 6


100%|██████████| 4883/4883 [06:22<00:00, 12.76it/s]


Train Loss: 0.0418


100%|██████████| 4868/4868 [02:35<00:00, 31.39it/s]


Validation Loss: 0.1822, Validation Score: 0.249, ClassWise: 0.065,0.647,0.034
Epoch: 7


100%|██████████| 4883/4883 [06:25<00:00, 12.68it/s]


Train Loss: 0.0372


100%|██████████| 4868/4868 [02:37<00:00, 30.93it/s]


Validation Loss: 0.1788, Validation Score: 0.244, ClassWise: 0.052,0.642,0.036
Epoch: 8


100%|██████████| 4883/4883 [06:28<00:00, 12.57it/s]


Train Loss: 0.0337


100%|██████████| 4868/4868 [02:37<00:00, 30.98it/s]


Validation Loss: 0.2065, Validation Score: 0.242, ClassWise: 0.040,0.641,0.045
Epoch: 9


100%|██████████| 4883/4883 [06:27<00:00, 12.61it/s]


Train Loss: 0.0309


100%|██████████| 4868/4868 [02:40<00:00, 30.36it/s]


Validation Loss: 0.2521, Validation Score: 0.225, ClassWise: 0.027,0.614,0.035
Epoch: 10


100%|██████████| 4883/4883 [06:25<00:00, 12.66it/s]


Train Loss: 0.0283


100%|██████████| 4868/4868 [02:40<00:00, 30.41it/s]


Validation Loss: 0.2439, Validation Score: 0.228, ClassWise: 0.036,0.611,0.037
Epoch: 11


100%|██████████| 4883/4883 [06:28<00:00, 12.58it/s]


Train Loss: 0.0263


100%|██████████| 4868/4868 [02:39<00:00, 30.60it/s]


Validation Loss: 0.2619, Validation Score: 0.236, ClassWise: 0.026,0.640,0.043
Epoch: 12


100%|██████████| 4883/4883 [06:25<00:00, 12.66it/s]


Train Loss: 0.0246


100%|██████████| 4868/4868 [02:38<00:00, 30.80it/s]


Validation Loss: 0.2659, Validation Score: 0.246, ClassWise: 0.031,0.656,0.051
Epoch: 13


100%|██████████| 4883/4883 [06:27<00:00, 12.61it/s]


Train Loss: 0.0231


100%|██████████| 4868/4868 [02:36<00:00, 31.05it/s]


Validation Loss: 0.2988, Validation Score: 0.221, ClassWise: 0.034,0.589,0.040
Epoch: 14


100%|██████████| 4883/4883 [06:25<00:00, 12.66it/s]


Train Loss: 0.0216


100%|██████████| 4868/4868 [02:36<00:00, 31.19it/s]


Validation Loss: 0.2783, Validation Score: 0.248, ClassWise: 0.035,0.663,0.045
Epoch: 15


100%|██████████| 4883/4883 [06:26<00:00, 12.63it/s]


Train Loss: 0.0206


100%|██████████| 4868/4868 [02:37<00:00, 30.97it/s]


Validation Loss: 0.2897, Validation Score: 0.236, ClassWise: 0.033,0.629,0.046
Epoch: 16


100%|██████████| 4883/4883 [06:28<00:00, 12.58it/s]


Train Loss: 0.0194


100%|██████████| 4868/4868 [02:35<00:00, 31.40it/s]


Validation Loss: 0.2554, Validation Score: 0.244, ClassWise: 0.050,0.640,0.041
Epoch: 17


100%|██████████| 4883/4883 [06:23<00:00, 12.72it/s]


Train Loss: 0.0184


100%|██████████| 4868/4868 [02:35<00:00, 31.28it/s]


Validation Loss: 0.3163, Validation Score: 0.239, ClassWise: 0.027,0.643,0.048
Epoch: 18


100%|██████████| 4883/4883 [06:23<00:00, 12.72it/s]


Train Loss: 0.0178


100%|██████████| 4868/4868 [02:37<00:00, 30.85it/s]


Validation Loss: 0.3223, Validation Score: 0.241, ClassWise: 0.029,0.654,0.040
Epoch: 19


100%|██████████| 4883/4883 [06:23<00:00, 12.74it/s]


Train Loss: 0.0170


100%|██████████| 4868/4868 [02:37<00:00, 30.88it/s]


Validation Loss: 0.3056, Validation Score: 0.239, ClassWise: 0.037,0.639,0.040
Epoch: 20


100%|██████████| 4883/4883 [06:23<00:00, 12.75it/s]


Train Loss: 0.0162


100%|██████████| 4868/4868 [02:39<00:00, 30.49it/s]


Validation Loss: 0.3565, Validation Score: 0.241, ClassWise: 0.026,0.655,0.041
Epoch: 21


100%|██████████| 4883/4883 [06:23<00:00, 12.73it/s]


Train Loss: 0.0155


100%|██████████| 4868/4868 [02:37<00:00, 31.00it/s]


Validation Loss: 0.3346, Validation Score: 0.239, ClassWise: 0.041,0.637,0.038
Epoch: 22


100%|██████████| 4883/4883 [06:25<00:00, 12.67it/s]


Train Loss: 0.0150


100%|██████████| 4868/4868 [02:36<00:00, 31.03it/s]


Validation Loss: 0.3381, Validation Score: 0.238, ClassWise: 0.028,0.636,0.049
Epoch: 23


100%|██████████| 4883/4883 [06:26<00:00, 12.62it/s]


Train Loss: 0.0144


100%|██████████| 4868/4868 [02:33<00:00, 31.63it/s]


Validation Loss: 0.3344, Validation Score: 0.234, ClassWise: 0.035,0.618,0.048
Epoch: 24


100%|██████████| 4883/4883 [06:24<00:00, 12.71it/s]


Train Loss: 0.0139


100%|██████████| 4868/4868 [02:31<00:00, 32.24it/s]


Validation Loss: 0.3282, Validation Score: 0.247, ClassWise: 0.034,0.663,0.044
Epoch: 25


100%|██████████| 4883/4883 [06:22<00:00, 12.75it/s]


Train Loss: 0.0134


100%|██████████| 4868/4868 [02:32<00:00, 31.83it/s]


Validation Loss: 0.3210, Validation Score: 0.231, ClassWise: 0.033,0.617,0.041
Epoch: 26


100%|██████████| 4883/4883 [06:24<00:00, 12.70it/s]


Train Loss: 0.0130


100%|██████████| 4868/4868 [02:35<00:00, 31.23it/s]


Validation Loss: 0.3498, Validation Score: 0.227, ClassWise: 0.049,0.587,0.045
Epoch: 27


100%|██████████| 4883/4883 [06:22<00:00, 12.76it/s]


Train Loss: 0.0127


100%|██████████| 4868/4868 [02:38<00:00, 30.77it/s]


Validation Loss: 0.3948, Validation Score: 0.235, ClassWise: 0.040,0.616,0.048
Epoch: 28


100%|██████████| 4883/4883 [06:22<00:00, 12.76it/s]


Train Loss: 0.0123


100%|██████████| 4868/4868 [02:38<00:00, 30.78it/s]


Validation Loss: 0.3866, Validation Score: 0.241, ClassWise: 0.029,0.647,0.045
Epoch: 29


100%|██████████| 4883/4883 [06:22<00:00, 12.77it/s]


Train Loss: 0.0118


100%|██████████| 4868/4868 [02:37<00:00, 30.88it/s]


Validation Loss: 0.3923, Validation Score: 0.233, ClassWise: 0.028,0.622,0.049
Epoch: 30


100%|██████████| 4883/4883 [06:20<00:00, 12.82it/s]


Train Loss: 0.0116


100%|██████████| 4868/4868 [02:37<00:00, 30.84it/s]


Validation Loss: 0.3832, Validation Score: 0.242, ClassWise: 0.030,0.652,0.044
Epoch: 31


100%|██████████| 4883/4883 [06:20<00:00, 12.85it/s]


Train Loss: 0.0112


100%|██████████| 4868/4868 [02:38<00:00, 30.80it/s]


Validation Loss: 0.3837, Validation Score: 0.239, ClassWise: 0.032,0.639,0.045
Epoch: 32


100%|██████████| 4883/4883 [06:20<00:00, 12.83it/s]


Train Loss: 0.0110


100%|██████████| 4868/4868 [02:39<00:00, 30.53it/s]


Validation Loss: 0.3923, Validation Score: 0.241, ClassWise: 0.031,0.646,0.046
Epoch: 33


100%|██████████| 4883/4883 [06:20<00:00, 12.83it/s]


Train Loss: 0.0107


100%|██████████| 4868/4868 [02:37<00:00, 30.87it/s]


Validation Loss: 0.3880, Validation Score: 0.238, ClassWise: 0.034,0.636,0.044
Epoch: 34


100%|██████████| 4883/4883 [06:20<00:00, 12.85it/s]


Train Loss: 0.0104


100%|██████████| 4868/4868 [02:38<00:00, 30.65it/s]


Validation Loss: 0.4145, Validation Score: 0.249, ClassWise: 0.033,0.666,0.049
Epoch: 35


100%|██████████| 4883/4883 [06:20<00:00, 12.82it/s]


Train Loss: 0.0102


100%|██████████| 4868/4868 [02:37<00:00, 30.95it/s]


Validation Loss: 0.4666, Validation Score: 0.236, ClassWise: 0.025,0.639,0.044
Epoch: 36


100%|██████████| 4883/4883 [06:19<00:00, 12.87it/s]


Train Loss: 0.0100


100%|██████████| 4868/4868 [02:37<00:00, 30.93it/s]


Validation Loss: 0.4225, Validation Score: 0.241, ClassWise: 0.032,0.638,0.053
Epoch: 37


100%|██████████| 4883/4883 [06:19<00:00, 12.86it/s]


Train Loss: 0.0096


100%|██████████| 4868/4868 [02:36<00:00, 31.14it/s]


Validation Loss: 0.4527, Validation Score: 0.230, ClassWise: 0.028,0.611,0.052
Epoch: 38


100%|██████████| 4883/4883 [06:20<00:00, 12.84it/s]


Train Loss: 0.0094


100%|██████████| 4868/4868 [02:36<00:00, 31.01it/s]


Validation Loss: 0.4574, Validation Score: 0.234, ClassWise: 0.030,0.619,0.053
Epoch: 39


100%|██████████| 4883/4883 [06:25<00:00, 12.67it/s]


Train Loss: 0.0091


100%|██████████| 4868/4868 [02:39<00:00, 30.58it/s]


Validation Loss: 0.4098, Validation Score: 0.243, ClassWise: 0.032,0.640,0.057
Epoch: 40


100%|██████████| 4883/4883 [06:24<00:00, 12.70it/s]


Train Loss: 0.0090


100%|██████████| 4868/4868 [02:39<00:00, 30.47it/s]


Validation Loss: 0.4567, Validation Score: 0.242, ClassWise: 0.029,0.646,0.051
Epoch: 41


100%|██████████| 4883/4883 [06:27<00:00, 12.60it/s]


Train Loss: 0.0089


100%|██████████| 4868/4868 [02:40<00:00, 30.33it/s]


Validation Loss: 0.4333, Validation Score: 0.244, ClassWise: 0.028,0.651,0.051
Epoch: 42


100%|██████████| 4883/4883 [06:26<00:00, 12.63it/s]


Train Loss: 0.0087


100%|██████████| 4868/4868 [02:39<00:00, 30.49it/s]


Validation Loss: 0.4220, Validation Score: 0.242, ClassWise: 0.043,0.633,0.050
Epoch: 43


100%|██████████| 4883/4883 [06:22<00:00, 12.76it/s]


Train Loss: 0.0085


100%|██████████| 4868/4868 [02:36<00:00, 31.19it/s]


Validation Loss: 0.4758, Validation Score: 0.236, ClassWise: 0.029,0.633,0.046
Epoch: 44


100%|██████████| 4883/4883 [06:22<00:00, 12.76it/s]


Train Loss: 0.0083


100%|██████████| 4868/4868 [02:35<00:00, 31.38it/s]


Validation Loss: 0.4509, Validation Score: 0.245, ClassWise: 0.033,0.657,0.047
Epoch: 45


100%|██████████| 4883/4883 [06:24<00:00, 12.70it/s]


Train Loss: 0.0082


100%|██████████| 4868/4868 [02:38<00:00, 30.76it/s]


Validation Loss: 0.4803, Validation Score: 0.241, ClassWise: 0.032,0.638,0.053
Epoch: 46


100%|██████████| 4883/4883 [06:27<00:00, 12.60it/s]


Train Loss: 0.0081


100%|██████████| 4868/4868 [02:40<00:00, 30.33it/s]


Validation Loss: 0.4793, Validation Score: 0.239, ClassWise: 0.030,0.639,0.047
Epoch: 47


100%|██████████| 4883/4883 [06:28<00:00, 12.56it/s]


Train Loss: 0.0079


100%|██████████| 4868/4868 [02:43<00:00, 29.80it/s]


Validation Loss: 0.4688, Validation Score: 0.246, ClassWise: 0.027,0.651,0.059
Epoch: 48


100%|██████████| 4883/4883 [06:28<00:00, 12.57it/s]


Train Loss: 0.0077


100%|██████████| 4868/4868 [02:39<00:00, 30.48it/s]


Validation Loss: 0.4707, Validation Score: 0.243, ClassWise: 0.034,0.648,0.049
Epoch: 49


100%|██████████| 4883/4883 [06:28<00:00, 12.57it/s]


Train Loss: 0.0076


100%|██████████| 4868/4868 [02:39<00:00, 30.61it/s]


Validation Loss: 0.4810, Validation Score: 0.242, ClassWise: 0.032,0.643,0.052


0

# Submission

In [13]:
model =ResConvClassifier(
    feature_channels=len(datacfg.feature_list), 
    seqlen=datacfg.window_size, 
    out_features=len(datacfg.label_list), 
    resnet_pooling_kernelsize = cfg.pooling_kernel,
    classifier_layers = cfg.classifier_layers
).to(cfg.device)
model.load_state_dict(torch.load("/kaggle/working/best_model_state_b1.h5"))             #取得最好的模型參數
model.eval()

test_defog_paths = glob.glob("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/test/defog/*.csv")
test_tdcsfog_paths = glob.glob("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/test/tdcsfog/*.csv")
test_fpaths = [(f, 'de') for f in test_defog_paths] + [(f, 'tdcs') for f in test_tdcsfog_paths]

test_dataset = FOGDataset(test_fpaths, datacfg, split="test")
test_loader = DataLoader(test_dataset, batch_size=datacfg.batch_size, num_workers=5)

ids = []
preds = []

for _id, x, _ in tqdm(test_loader):
    x = x.to(cfg.device).float()
    with torch.no_grad():
        y_pred = model(x)*0.1
    
    ids.extend(_id)
    preds.extend(list(np.nan_to_num(y_pred.cpu().numpy())))

Dataset initialized in 0.4898190498352051 secs!


100%|██████████| 280/280 [00:06<00:00, 43.78it/s]


In [14]:
sample_submission = pd.read_csv("/kaggle/input/tlvmc-parkinsons-freezing-gait-prediction/sample_submission.csv")
sample_submission.shape

(286370, 4)

In [15]:
preds = np.array(preds)
submission = pd.DataFrame(
    {
        'Id': ids, 
        'StartHesitation': np.round(preds[:,0],5),                 
        'Turn': np.round(preds[:,1],5), 
        'Walking': np.round(preds[:,2],5)
    }
)

submission = pd.merge(sample_submission[['Id']], submission, how='left', on='Id').fillna(0.0)
submission.to_csv("submission.csv", index=False)
print(submission.shape)
submission.head()

(286370, 4)


Unnamed: 0,Id,StartHesitation,Turn,Walking
0,003f117e14_0,-2.70163,-1.68212,-2.13139
1,003f117e14_1,-2.71,-1.68383,-2.14018
2,003f117e14_2,-2.70809,-1.68727,-2.14271
3,003f117e14_3,-2.70233,-1.68228,-2.11908
4,003f117e14_4,-2.70912,-1.68913,-2.13474
