## Pytorchによる分析

In [1]:
import os
os.chdir('C:/Users/Takanori/Desktop/Kaggle/SIGNATE2205')

In [2]:
# Import Lib

import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from sklearn.metrics import accuracy_score
import time, gc
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.autograd import Variable
from torch.optim import lr_scheduler
import math

In [3]:
class CFG:
    target = 'charges'
    batch_size = 128
    apex=True
    seed=42
    n_fold=5
    max_grad_norm=1000
    batch_scheduler=True
    print_freq=100000000
    num_workers=0
    trn_fold=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    lr = 1e-2
    lr_gamma = 0.9
    epochs = 200

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Import csv

In [5]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sub = pd.read_csv('sample_submit.csv')

In [6]:
train.head()

Unnamed: 0,id,age,sex,bmi,children,smoker,region,charges
0,1,45,male,46.561704,5,no,southeast,0
1,2,18,female,23.572081,3,no,southwest,0
2,4,28,female,38.670352,0,no,southeast,0
3,5,46,female,45.614196,0,no,southwest,0
4,10,27,male,38.76961,0,no,northeast,0


In [7]:
test[CFG.target] = 0

## FE

In [8]:
train.info()
# nullは存在しない。

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1200 entries, 0 to 1199
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   id        1200 non-null   int64  
 1   age       1200 non-null   int64  
 2   sex       1200 non-null   object 
 3   bmi       1200 non-null   float64
 4   children  1200 non-null   int64  
 5   smoker    1200 non-null   object 
 6   region    1200 non-null   object 
 7   charges   1200 non-null   int64  
dtypes: float64(1), int64(4), object(3)
memory usage: 75.1+ KB


In [9]:
train.describe()

Unnamed: 0,id,age,bmi,children,charges
count,1200.0,1200.0,1200.0,1200.0,1200.0
mean,1009.728333,38.238333,33.665249,0.884167,0.255833
std,581.366414,12.708571,5.86687,1.093959,0.586517
min,1.0,18.0,22.997608,0.0,0.0
25%,503.75,28.0,29.741881,0.0,0.0
50%,1011.0,38.0,33.441095,0.0,0.0
75%,1522.25,49.0,38.57539,2.0,0.0
max,1999.0,63.0,46.75501,5.0,2.0


In [10]:
train.nunique()

id          1200
age           45
sex            2
bmi         1200
children       6
smoker         2
region         4
charges        3
dtype: int64

In [11]:
df_all = pd.concat([train, test])

In [12]:
# 文字列を数値に変換
for col in df_all.columns:
    if df_all[col].dtype == 'object':
        l_enc = LabelEncoder()
        df_all[col] = l_enc.fit_transform(df_all[col].values)

In [13]:
train = df_all.iloc[:len(train)]
test = df_all.iloc[len(train):]

In [14]:
features = [c for c in train.columns if c not in [CFG.target, 'id']]

In [15]:
# CV Split
skf = StratifiedKFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)
for n, (train_index, val_index) in enumerate(skf.split(train, train[CFG.target])):
    train.loc[val_index, 'fold'] = int(n)
train['fold'] = train['fold'].astype(int)
display(train.groupby('fold').size())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train.loc[val_index, 'fold'] = int(n)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['fold'] = train['fold'].astype(int)


fold
0    240
1    240
2    240
3    240
4    240
dtype: int64

## Pytorch

In [16]:
# Define Model
class Net1(nn.Module):
    def __init__(self, input_size):
        self.model_name = 'Net1'
        # 4層
        super(Net1, self).__init__()
        self.input_size = input_size
        self.first_bn = nn.BatchNorm1d(self.input_size, momentum=0.01)  # とりあえず入れてみた。momentumが小さいといいことあるんかな・・？
        self.fc1 = nn.Linear(self.input_size, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 384)
        self.bn2 = nn.BatchNorm1d(384)
        self.fc3 = nn.Linear(384, 256)
        self.bn3 = nn.BatchNorm1d(256)
        self.fc4 = nn.Linear(256, 128)
        self.bn4 = nn.BatchNorm1d(128)
        self.fc5 = nn.Linear(128, 64)
        self.bn5 = nn.BatchNorm1d(64)
        self.fc6 = nn.Linear(64, 32)
        self.bn6 = nn.BatchNorm1d(32)
        self.fc7 = nn.Linear(32, 3)
        self.bn7 = nn.BatchNorm1d(3)

    def forward(self, x):
        # dropoutの後にbnを置いてはならない
        # nbは、活性化関数の前に置く
        x = self.first_bn(x)
        x = F.silu(self.bn1((self.fc1(x))))
        x = F.silu(self.bn2((self.fc2(x))))
        x = F.silu(self.bn3((self.fc3(x))))
        x = F.silu(self.bn4((self.fc4(x))))
        x = F.silu(self.bn5((self.fc5(x))))
        x = F.silu(self.bn6((self.fc6(x))))
        x = self.bn7((self.fc7(x)))
        x = F.softmax(x, dim=1)
        return x

In [17]:
from torch.utils.data import DataLoader, Dataset

In [18]:
# Define DataSet
class TrainDataset(Dataset):
    def __init__(self, df, features, target):
        self.target = df[target].values
        self.data = df[features].values

    def __len__(self):
        return len(self.target)

    def __getitem__(self, item):
        inputs = torch.tensor(self.data[item], dtype=torch.float32)
        label = torch.tensor(self.target[item], dtype=torch.long)
        return inputs, label


In [19]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


In [20]:
def get_score(y_true, y_pred):
    score = accuracy_score(y_true, y_pred)
    return score

In [21]:
def train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler()  # これが何か調べる
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0

    for step, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):  # これが何か調べる
            y_preds = model(inputs)
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        grad_norm =torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)

        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()
        global_step += 1
        if CFG.batch_scheduler:
            scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg

In [22]:
def valid_fn(valid_loader, model, epoch, criterion, device):
    model.eval()
    scaler = torch.cuda.amp.GradScaler()  # これが何か調べる
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    preds = []

    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            y_preds = model(inputs)
        loss = criterion(y_preds, labels)
        losses.update(loss.item(), batch_size)
        grad_norm =torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)

        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  .format(epoch+1, step, len(valid_loader), 
                          remain=timeSince(start, float(step+1)/len(valid_loader)),
                          loss=losses,
                          grad_norm=grad_norm,))

        _, predicted = torch.max(y_preds.data, 1)
        predicted = predicted.cpu().numpy()
        preds.append(predicted)
    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [23]:
from tqdm.auto import tqdm

In [24]:
def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        _, predicted = torch.max(y_preds.data, 1)
        predicted = predicted.cpu().numpy()
        preds.append(predictions)
    predictions = np.concatenate(preds)
    return predictions

In [31]:
# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    print(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['fold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['fold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds[CFG.target].values
    
    train_dataset = TrainDataset(train_folds, features, CFG.target)
    valid_dataset = TrainDataset(valid_folds, features, CFG.target)

    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = Net1(len(features)).to(device)

    def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': weight_decay},
            {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': 0.0},
            {'params': [p for n, p in model.named_parameters() if "model" not in n],
             'lr': decoder_lr, 'weight_decay': 0.0}
        ]
        return optimizer_parameters

    # optimizer = AdamW(lr=CFG.lr, eps=CFG.eps, betas=CFG.betas)
    optimizer = AdamW(model.parameters(), lr=CFG.lr)

    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        # lr_gammna**n = 1e-3
        # n = log(lr_gammna) / log(1e-3)
        n = math.log(1e-3) / math.log(cfg.lr_gamma)
        step_size = int(num_train_steps / n)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=cfg.lr_gamma)
        return scheduler

    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.CrossEntropyLoss()

    best_score = -float('inf')

    model_file_name = model.model_name + f"_fold{fold}_best.pth"

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, epoch, criterion, device)

        # scoring
        score = get_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        print(f'Epoch {epoch+1} - Score: {score:.4f}')

        if best_score < score:
            best_score = score
            print(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')

            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        model_file_name)

    predictions = torch.load(model_file_name, 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [26]:
from torch.optim import Adam, SGD, AdamW

In [32]:
def get_result(oof_df):
    labels = oof_df[CFG.target].values
    preds = oof_df['pred'].values
    score = get_score(labels, preds)
    print(f'Score: {score:<.4f}')

oof_df = pd.DataFrame()
for fold in range(CFG.n_fold):
    if fold in CFG.trn_fold:
        _oof_df = train_loop(train, fold)
        oof_df = pd.concat([oof_df, _oof_df])
        print(f"========== fold: {fold} result ==========")
        get_result(_oof_df)
oof_df = oof_df.reset_index(drop=True)
print(f"========== CV ==========")
get_result(oof_df)
oof_df.to_pickle('oof_df.pkl')

Epoch: [1][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 1.1429(1.1429) Grad: 197728.2656  LR: 0.01000000  




Epoch: [1][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8898(0.9776) Grad: 15101.3955  LR: 0.01000000  
Epoch: [1][0/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4968(1.4968) Grad: 0.0000  
Epoch: [1][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4443(1.4723) Grad: 0.0000  
Epoch 1 - avg_train_loss: 0.9776  avg_val_loss: 1.4723  time: 0s
Epoch 1 - Score: 0.0792
Epoch 1 - Save Best Score: 0.0792 Model
Epoch: [2][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8914(0.8914) Grad: 14361.1318  LR: 0.01000000  
Epoch: [2][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8374(0.8707) Grad: 14571.2705  LR: 0.01000000  
Epoch: [2][0/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4968(1.4968) Grad: 0.0000  
Epoch: [2][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4443(1.4723) Grad: 0.0000  
Epoch 2 - avg_train_loss: 0.8707  avg_val_loss: 1.4723  time: 0s
Epoch 2 - Score: 0.0792
Epoch: [3][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8400(0.8400) Grad: 15233.6172  LR: 0.01000000  
Epoch: [3][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7



Epoch: [1][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 1.1536(1.1536) Grad: 199458.5312  LR: 0.01000000  
Epoch: [1][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.9083(0.9504) Grad: 14598.5488  LR: 0.01000000  
Epoch: [1][0/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4655(1.4655) Grad: 0.0000  
Epoch: [1][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4800(1.4723) Grad: 0.0000  
Epoch 1 - avg_train_loss: 0.9504  avg_val_loss: 1.4723  time: 0s
Epoch 1 - Score: 0.0792
Epoch 1 - Save Best Score: 0.0792 Model
Epoch: [2][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8592(0.8592) Grad: 15208.9688  LR: 0.01000000  
Epoch: [2][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8456(0.8694) Grad: 12540.3164  LR: 0.01000000  
Epoch: [2][0/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4655(1.4655) Grad: 0.0000  
Epoch: [2][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4800(1.4723) Grad: 0.0000  
Epoch 2 - avg_train_loss: 0.8694  avg_val_loss: 1.4723  time: 0s
Epoch 2 - Score: 0.0792
Epoch: [3][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.



Epoch: [1][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 1.0903(1.0903) Grad: 175920.7969  LR: 0.01000000  
Epoch: [1][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.9042(0.9390) Grad: 15812.9600  LR: 0.01000000  
Epoch: [1][0/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4653(1.4653) Grad: 0.0000  
Epoch: [1][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4888(1.4763) Grad: 0.0000  
Epoch 1 - avg_train_loss: 0.9390  avg_val_loss: 1.4763  time: 0s
Epoch 1 - Score: 0.0750
Epoch 1 - Save Best Score: 0.0750 Model
Epoch: [2][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8786(0.8786) Grad: 15538.4219  LR: 0.01000000  
Epoch: [2][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8193(0.8622) Grad: 12960.6562  LR: 0.01000000  
Epoch: [2][0/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4379(1.4379) Grad: 0.0000  
Epoch: [2][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4599(1.4481) Grad: 0.0000  
Epoch 2 - avg_train_loss: 0.8622  avg_val_loss: 1.4481  time: 0s
Epoch 2 - Score: 0.0792
Epoch 2 - Save Best Score: 0.0792 Model
Epoch: [3][0/



Epoch: [1][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4220(1.3877) Grad: 0.0000  
Epoch 1 - avg_train_loss: 0.9550  avg_val_loss: 1.3877  time: 0s
Epoch 1 - Score: 0.0875
Epoch 1 - Save Best Score: 0.0875 Model
Epoch: [2][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8576(0.8576) Grad: 15362.8984  LR: 0.01000000  
Epoch: [2][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8601(0.8676) Grad: 15113.0430  LR: 0.01000000  
Epoch: [2][0/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4412(1.4412) Grad: 0.0000  
Epoch: [2][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.5148(1.4756) Grad: 0.0000  
Epoch 2 - avg_train_loss: 0.8676  avg_val_loss: 1.4756  time: 0s
Epoch 2 - Score: 0.0750
Epoch: [3][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8277(0.8277) Grad: 14232.0439  LR: 0.01000000  
Epoch: [3][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.7801(0.8273) Grad: 12308.4238  LR: 0.01000000  
Epoch: [3][0/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.0105(1.0105) Grad: 0.0000  
Epoch: [3][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 0.9



Epoch: [1][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 1.0999(1.0999) Grad: 140039.4688  LR: 0.01000000  
Epoch: [1][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8573(0.9452) Grad: 14236.0137  LR: 0.01000000  
Epoch: [1][0/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4493(1.4493) Grad: 0.0000  
Epoch: [1][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.5059(1.4757) Grad: 0.0000  
Epoch 1 - avg_train_loss: 0.9452  avg_val_loss: 1.4757  time: 0s
Epoch 1 - Score: 0.0750
Epoch 1 - Save Best Score: 0.0750 Model
Epoch: [2][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8783(0.8783) Grad: 15570.6152  LR: 0.01000000  
Epoch: [2][6/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.8392(0.8718) Grad: 14979.4590  LR: 0.01000000  
Epoch: [2][0/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.4453(1.4453) Grad: 0.0000  
Epoch: [2][1/2] Elapsed 0m 0s (remain 0m 0s) Loss: 1.5027(1.4721) Grad: 0.0000  
Epoch 2 - avg_train_loss: 0.8718  avg_val_loss: 1.4721  time: 0s
Epoch 2 - Score: 0.0750
Epoch: [3][0/7] Elapsed 0m 0s (remain 0m 0s) Loss: 0.

In [33]:
predictions = []

In [34]:
import glob

In [35]:
def inference_fn(test_loader, model, device):
    ret = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs, labels in tk0:
        inputs = inputs.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        _, preds = torch.max(y_preds, dim=1)
        ret.append(preds.to('cpu').numpy())
    predictions = np.concatenate(ret)
    return predictions

In [36]:
model_path = ''
models = np.sort(glob.glob(f"*best.pth"))
print(models)


['Net1_fold0_best.pth' 'Net1_fold1_best.pth' 'Net1_fold2_best.pth'
 'Net1_fold3_best.pth' 'Net1_fold4_best.pth' 'Net1_fold5_best.pth'
 'Net1_fold6_best.pth' 'Net1_fold7_best.pth' 'Net1_fold8_best.pth'
 'Net1_fold9_best.pth']


In [37]:
test_dataset = TrainDataset(test, features, CFG.target)
test_loader = DataLoader(test_dataset,
                            batch_size=CFG.batch_size,
                            shuffle=False,
                            num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

In [38]:
df = pd.DataFrame()
for model_name in models:
    model = Net1(len(features)).to(device)
    state = torch.load(model_name, map_location=torch.device('cpu'))
    model.load_state_dict(state['model'])
    prediction = inference_fn(test_loader, model, device)
    df = pd.concat([df, pd.DataFrame(prediction)], axis=1)
    del model, state, prediction; gc.collect()
    torch.cuda.empty_cache()

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

In [39]:
from scipy.stats import mode

In [40]:
df

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,2,2,2,2,2,2,2,2,2,2
3,0,0,0,0,0,0,0,0,0,0
4,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...
795,0,0,0,0,0,0,0,0,0,0
796,0,0,0,0,0,0,0,0,0,0
797,0,0,0,0,0,0,0,0,0,0
798,1,1,1,1,1,1,1,1,2,1


In [41]:
df['pred'] = df.mode(axis=1)[0].astype(int)

In [42]:
df['id'] = test['id'].values

In [43]:
df[['id', 'pred']].to_csv('submission.csv', index=False, header=False)