# Overview
- yukiさんのノートブックを参考にする

In [1]:
import subprocess
cmd = "git rev-parse --short HEAD"
hash = subprocess.check_output(cmd.split()).strip().decode('utf-8')
print(hash)

ed81633


# Const

In [2]:
NB = '001'
DIR_TRAIN = './../data_ignore/input/train/'
DIR_TEST = './../data_ignore/input/test/'
DIR_WIFI = './../data_ignore/input/wifi/'
PATH_SUB = './../data_ignore/input/sample_submission.csv'

In [3]:
config_str = '''
globals:
    seed: 5713
    n_label: 24
    n_splits: 5
    random_sate: 42
    lr: 0.001
    patience: 10
    epoch: 100
    batch_size: 512
    skip_evaluate_num: 5
    num_feats: 20
'''

# Import everything I need:)

In [4]:
import os
import yaml
import types
import random
import pickle
import builtins
import numpy as np
import pandas as pd
from tqdm import tqdm
from glob import glob
from loguru import logger
from collections import OrderedDict

# sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import KFold

# pytorch
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

# Function

In [5]:
def imports():
    for name, val in globals().items():
        # module imports
        if isinstance(val, types.ModuleType):
            yield name, val

            # functions / callables
        if hasattr(val, '__call__'):
            yield name, val


def noglobal(f):
    '''
    ref: https://gist.github.com/raven38/4e4c3c7a179283c441f575d6e375510c
    '''
    return types.FunctionType(f.__code__,
                              dict(imports()),
                              f.__name__,
                              f.__defaults__,
                              f.__closure__
                              )


def comp_metric(xhat, yhat, fhat, x, y, f):
    intermediate = np.sqrt(np.power(xhat-x, 2) + np.power(yhat-y, 2)) + 15 * np.abs(fhat-f)
    return intermediate.sum()/xhat.shape[0]

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


# Preparation

load config

In [6]:
config = yaml.safe_load(config_str)
config

{'globals': {'seed': 5713,
  'n_label': 24,
  'n_splits': 5,
  'random_sate': 42,
  'lr': 0.001,
  'patience': 10,
  'epoch': 100,
  'batch_size': 512,
  'skip_evaluate_num': 5,
  'num_feats': 20}}

<br>

set

In [7]:
seed_everything(config['globals']['seed'])

<br>

load dataset

In [8]:
with open(f'{DIR_WIFI}train_all.pkl', 'rb') as f:
    df_train = pickle.load( f)
with open(f'{DIR_WIFI}test_all.pkl', 'rb') as f:
    df_test = pickle.load( f)

<br>

preprocessing

In [9]:
bssid_feats = [f'bssid_{i}' for i in range(config['globals']['num_feats'])]
rssi_feats  = [f'rssi_{i}' for i in range(config['globals']['num_feats'])]

In [10]:
# bssidの一覧作成
# wifi_bassidにはtrainとtest両方のbssidの一覧が含まれる

wifi_bssids = []
for i in range(100):
    wifi_bssids.extend(df_train.iloc[:,i].values.tolist())
wifi_bssids = list(set(wifi_bssids))

wifi_bssids_size = len(wifi_bssids)
print(f'BSSID TYPES: {wifi_bssids_size}')

wifi_bssids_test = []
for i in range(100):
    wifi_bssids_test.extend(df_test.iloc[:,i].values.tolist())
wifi_bssids_test = list(set(wifi_bssids_test))

wifi_bssids_size = len(wifi_bssids_test)
print(f'BSSID TYPES: {wifi_bssids_size}')

wifi_bssids.extend(wifi_bssids_test)
wifi_bssids_size = len(wifi_bssids)
print(f'BSSID TYPES: {wifi_bssids_size}')

BSSID TYPES: 61206
BSSID TYPES: 33042
BSSID TYPES: 94248


In [11]:
# LabelEncoding & StandardScaler

le = LabelEncoder()
le.fit(wifi_bssids)
le_site = LabelEncoder()
le_site.fit(df_train['site_id'])

ss = StandardScaler()
ss.fit(df_train.loc[:, rssi_feats])

StandardScaler()

In [12]:
df_train.loc[:, rssi_feats] = ss.transform(df_train.loc[:,rssi_feats])
df_test.loc[:, rssi_feats] = ss.transform(df_test.loc[:, rssi_feats])
for feat in bssid_feats:
    df_train.loc[:, feat] = le.transform(df_train.loc[:, feat])
    df_test.loc[:, feat] = le.transform(df_test.loc[:, feat])
    
    df_train.loc[:, feat] = df_train.loc[:, feat] + 1
    df_test.loc[:, feat] = df_test.loc[:, feat] + 1
    
df_train.loc[:, 'site_id'] = le_site.transform(df_train.loc[:, 'site_id'])
df_test.loc[:, 'site_id'] = le_site.transform(df_test.loc[:, 'site_id'])

df_train.loc[:, rssi_feats] = ss.transform(df_train.loc[:, rssi_feats])
df_test.loc[:, rssi_feats] = ss.transform(df_test.loc[:, rssi_feats])

In [13]:
site_count = len(df_train['site_id'].unique())
df_train.reset_index(drop=True, inplace=True)

In [14]:
df_train.head()

Unnamed: 0,bssid_0,bssid_1,bssid_2,bssid_3,bssid_4,bssid_5,bssid_6,bssid_7,bssid_8,bssid_9,...,rssi_95,rssi_96,rssi_97,rssi_98,rssi_99,x,y,floor,path,site_id
0,52393,35871,2765,34898,52710,35260,42720,33510,23417,15249,...,-79,-79,-79,-79,-79,107.85044,161.89262,-1,5e1580adf4c3420006d520d4,0
1,35871,52393,7487,34898,52710,35260,21971,15249,17025,5351,...,-79,-79,-79,-80,-80,107.85044,161.89262,-1,5e1580adf4c3420006d520d4,0
2,35871,52393,52710,34898,35260,23417,49408,6673,7487,48501,...,-77,-78,-78,-78,-78,98.33065,163.34334,-1,5e1580adf4c3420006d520d4,0
3,23417,34898,35260,52393,35871,3707,49408,15613,10167,4978,...,-75,-76,-76,-77,-77,98.33065,163.34334,-1,5e1580adf4c3420006d520d4,0
4,35871,35260,23417,19473,52393,3707,49408,18306,21410,52795,...,-75,-76,-76,-77,-77,98.33065,163.34334,-1,5e1580adf4c3420006d520d4,0


# Execute

In [15]:
class EarlyStopping:
    """
    Early stops the training if validation loss doesn't improve after a given patience.
    based on: https://github.com/Bjarten/early-stopping-pytorch
    """
    def __init__(self, patience=7, verbose=False, delta=0, fold=''):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            logger.info(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            logger.info(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
            
#         torch.save(model.state_dict(), ROOT_DIR / Path('output') / f'checkpoint_{NB}_{fold}.pt')
#         torch.save(model.state_dict(),  f'checkpoint_{NB}_{fold}.pt')
        self.val_loss_min = val_loss

In [16]:
class IndoorWiFiNet(nn.Module):
    def __init__(self, bssid_size, site_size, num_feats):
        super(IndoorWiFiNet, self).__init__()


        self.bssid_embedding = nn.Embedding(bssid_size, 64, max_norm=True)
        
        self.site_embedding = nn.Embedding(site_size, 64, max_norm=True)

        self.rssi = nn.Sequential(
            nn.BatchNorm1d(20),
            nn.Linear(20, num_feats * 64)
        )


        concat_size = 64 + (num_feats * 64) + (num_feats * 64)
        self.bn1 = nn.BatchNorm1d(concat_size)

        self.flatten = nn.Flatten()

        self.dropout1 = nn.Dropout(0.3)
        self.linear1 = nn.Linear(in_features=concat_size, out_features=256)#, bias=False)
        self.bn2 = nn.BatchNorm1d(256)

        self.linear2 = nn.Linear(in_features=256, out_features=128)#, bias=False)
        self.linear3 = nn.Linear(in_features=128, out_features=16)#, bias=False)

        self.bn3 = nn.BatchNorm1d(128)
        self.bn4 = nn.BatchNorm1d(16)


        self.xy = nn.Linear(in_features=16, out_features=2)#, bias=False)
        self.floor = nn.Linear(in_features=16, out_features=1)#, bias=False)
        

    def forward(self, bssid, rssi, site):

        site = torch.reshape(site, (-1, 1))

        bssid_out = self.bssid_embedding(bssid)
        site_out = self.site_embedding(site)

        rssi_out = self.rssi(rssi)


        bssid_out = self.flatten(bssid_out)
        site_out = self.flatten(site_out)

        x = torch.cat([bssid_out, rssi_out, site_out], dim=1)
        x = self.bn1(x)
        x = self.dropout1(x)
        x = F.relu(self.linear1(x))
        x = self.bn2(x)

        x = F.relu(self.linear2(x))
        x = self.bn3(x)

        x = F.relu(self.linear3(x))
        x = self.bn4(x)

        #x = self.lstm(x)

        xy = self.xy(x)
        floor = self.floor(x)

        return xy, floor

In [17]:
class IndoorWiFiDataSet(Dataset):
    def __init__(self, wifi_df, data_type='test'):
        self.wifi_df = wifi_df
        self.data_type = data_type
        self.bssids = wifi_df[bssid_feats].to_numpy().astype(np.int)    # <----- リファクタ
        self.rssis = wifi_df[rssi_feats].to_numpy().astype(np.float)    # <----- リファクタ
        self.site_ids = wifi_df['site_id'].to_numpy()
        if data_type in ['train', 'valid']:
            self.floors = wifi_df['floor'].to_numpy().astype(np.int)
            self.xs = wifi_df['x'].to_numpy()
            self.ys = wifi_df['y'].to_numpy()
        
    def __len__(self):
        return len(self.wifi_df)

    def __getitem__(self, idx):

        #wifi_row = self.wifi_df.iloc[idx]

        bssid = self.bssids[idx,:]
        rssi = self.rssis[idx,:]
        site_id = self.site_ids[idx]

        if self.data_type in ['test', 'valid']:
            return bssid, rssi, site_id

        elif self.data_type == 'train':
            x = self.xs[idx]
            y = self.ys[idx]
            floor = self.floors[idx]

            return bssid, rssi, site_id, x, y, floor


In [18]:
class IndoorLoss(nn.Module):
    def __init__(self):
        super().__init__()

        self.mse = nn.MSELoss()

    def forward(self, x, y, floor, x_pred, y_pred, floor_pred):

        x_loss = self.mse(x, x_pred)
        y_loss = self.mse(y, y_pred)
        floor_loss = self.mse(floor, floor_pred)

        return x_loss + y_loss + floor_loss

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [20]:
%%time

oof_x = np.zeros((len(df_train)))
oof_y = np.zeros((len(df_train)))
oof_floor = np.zeros((len(df_train)))
cv_scores = {}

kf = KFold(n_splits=config['globals']['n_splits'], random_state=42, shuffle=True)
for fold, (train_index, valid_index) in enumerate(kf.split(df_train)):
    
    print(f'\r\n====== {fold} ======')
    
    net = IndoorWiFiNet(wifi_bssids_size, site_count, config['globals']['num_feats'])
    net = net.to(device)

    #criterion = IndoorLoss()
    criterion = nn.MSELoss()
    criterion = criterion.to(device)
    #optimizer = optim.SGD(net.parameters(), lr=Config.LR, weight_decay=0.0001, momentum=0.9)
    optimizer = torch.optim.Adam(net.parameters(), lr=config['globals']['lr'])
    
    train, valid = df_train.iloc[train_index], df_train.iloc[valid_index]

    # TODO oof用のdataloaderはいらないはず
    train_dataset = IndoorWiFiDataSet(train.reset_index(drop=True), data_type='train')
    valid_dataset = IndoorWiFiDataSet(valid.reset_index(drop=True), data_type='valid')

    trainloader = DataLoader(train_dataset, batch_size=config['globals']['batch_size'], shuffle=True, drop_last=True, num_workers=4)
    validloader = DataLoader(valid_dataset, batch_size=config['globals']['batch_size'], num_workers=4)

    early_stopping = EarlyStopping(patience=config['globals']['patience'], verbose=True, fold=fold)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config['globals']['epoch'])


    ############# TRAIN #############
    val_losses = []
    val_metrics = []
    for epoch in range(config['globals']['epoch']):

        running_loss = 0.0
        #train_lwlrap = 0.0
        n_iter = len(trainloader)
        with tqdm(enumerate(trainloader), total=n_iter) as pbar:
            for i, (bssid, rssi, site_id, x, y, floor) in pbar:
                net.train()
                # zero the parameter gradients
                optimizer.zero_grad()

                bssid, rssi, site_id, x, y, floor = bssid.to(device).long(), rssi.to(device).float(), site_id.to(device).long(), x.to(device).float(), y.to(device).float(), floor.to(device)

                xy_pred, floor_pred = net(bssid, rssi, site_id)
                xy_pred = xy_pred.transpose(0, 1).squeeze(-2).reshape(-1)
                label = torch.cat([x, y], dim=-1)
                loss = criterion(xy_pred, label)
                #loss = criterion(x, y, floor, x_pred, y_pred, floor_pred)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

                pbar.set_postfix(OrderedDict(
                    epoch="{:>10}".format(epoch), loss="{:.4f}".format(loss.item())
                ))
            #scheduler.step()
            
        #if epoch < Config.SKIP_EVALUATE_NUM:
        #    continue

        ############# VALID #############
        val_loss = 0.0
        val_lwlrap = 0.0
        x_preds = np.array([])
        y_preds = np.array([])
        val_preds_frame = []
        n_iter_val = len(validloader)
        for i, (bssid, rssi, site_id) in tqdm(enumerate(validloader), total=len(validloader)):
            net.eval()

            with torch.no_grad():
                
                #melspec, labels = melspec.to(device).float(), labels.to(device).float()
                bssid, rssi, site_id = bssid.to(device).long(), rssi.to(device).float(), site_id.to(device).long()
                xy_pred, floor_pred = net(bssid, rssi, site_id)

                xy_pred = xy_pred.to('cpu').detach().numpy().copy()
                x_pred, y_pred = np.hsplit(xy_pred, 2)
                x_preds = np.concatenate([x_preds, x_pred.reshape(-1)])
                y_preds = np.concatenate([y_preds, y_pred.reshape(-1)])

        score = comp_metric(x_preds, y_preds, 0, valid['x'], valid['y'], 0)

        early_stopping(score, net)

        if early_stopping.early_stop:
            logger.info("Early stopping")
            cv_scores[f'cv{fold}'] = early_stopping.best_score
            break
                
    _x_oof = np.array([])
    _y_oof = np.array([])
    ############# OOF #############
    oof_preds = []
    for i, (bssid, rssi, site_id) in tqdm(enumerate(validloader), total=len(validloader)):
#         net.load_state_dict(torch.load(ROOT_DIR / Path('output') / f'checkpoint_{NB}_{fold}.pt'))
#         net.load_state_dict(f'checkpoint_{NB}_{fold}.pt')
        net.eval()

        with torch.no_grad():

            bssid, rssi, site_id, x, y, floor = bssid.to(device).long(), rssi.to(device).float(), site_id.to(device).long(), x.to(device).float(), y.to(device).float(), floor.to(device)

            xy_pred, floor_pred = net(bssid, rssi, site_id)

            xy_pred = xy_pred.to('cpu').detach().numpy().copy()
            x_pred, y_pred = np.hsplit(xy_pred, 2)
            _x_oof = np.concatenate([_x_oof, x_pred.reshape(-1)])
            _y_oof = np.concatenate([_y_oof, y_pred.reshape(-1)])

    print(comp_metric(_x_oof, _y_oof, 0, valid['x'], valid['y'], 0))

    oof_x[valid_index] = _x_oof
    oof_y[valid_index] = _y_oof

    del net
    torch.cuda.empty_cache()
    




100%|██████████| 403/403 [00:03<00:00, 119.34it/s, epoch=0, loss=14679.4971]
100%|██████████| 101/101 [00:00<00:00, 301.02it/s]
2021-03-25 07:42:41.089 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (inf --> 158.836516).  Saving model ...
100%|██████████| 403/403 [00:03<00:00, 120.14it/s, epoch=1, loss=10459.9863]
100%|██████████| 101/101 [00:00<00:00, 300.80it/s]
2021-03-25 07:42:45.458 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (158.836516 --> 138.702419).  Saving model ...
100%|██████████| 403/403 [00:03<00:00, 119.24it/s, epoch=2, loss=6340.1836]
100%|██████████| 101/101 [00:00<00:00, 285.59it/s]
2021-03-25 07:42:49.870 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (138.702419 --> 109.953360).  Saving model ...
100%|██████████| 403/403 [00:03<00:00, 121.05it/s, epoch=3, loss=2928.5596]
100%|██████████| 101/101 [00:00<00:00, 297.54it/s]
2021-03-25 07:42:54.219 | INFO     | __main__:save_checkpoint:44 - Validation

5.680477417855777



100%|██████████| 403/403 [00:03<00:00, 119.75it/s, epoch=0, loss=14995.0781]
100%|██████████| 101/101 [00:00<00:00, 319.48it/s]
2021-03-25 07:49:57.232 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (inf --> 159.575007).  Saving model ...
100%|██████████| 403/403 [00:03<00:00, 120.31it/s, epoch=1, loss=10150.5244]
100%|██████████| 101/101 [00:00<00:00, 302.06it/s]
2021-03-25 07:50:01.625 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (159.575007 --> 139.748262).  Saving model ...
100%|██████████| 403/403 [00:03<00:00, 117.88it/s, epoch=2, loss=5944.5630]
100%|██████████| 101/101 [00:00<00:00, 298.41it/s]
2021-03-25 07:50:06.091 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (139.748262 --> 109.345645).  Saving model ...
100%|██████████| 403/403 [00:03<00:00, 119.82it/s, epoch=3, loss=2941.1626]
100%|██████████| 101/101 [00:00<00:00, 299.86it/s]
2021-03-25 07:50:10.500 | INFO     | __main__:save_checkpoint:44 - Validation

5.593017968161495



100%|██████████| 403/403 [00:04<00:00, 86.62it/s, epoch=0, loss=14457.9795] 
100%|██████████| 101/101 [00:02<00:00, 41.64it/s]
2021-03-25 07:59:19.863 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (inf --> 159.429516).  Saving model ...
100%|██████████| 403/403 [00:04<00:00, 86.47it/s, epoch=1, loss=10400.2627] 
100%|██████████| 101/101 [00:02<00:00, 42.72it/s]
2021-03-25 07:59:27.544 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (159.429516 --> 140.243735).  Saving model ...
100%|██████████| 403/403 [00:05<00:00, 75.53it/s, epoch=2, loss=5653.4536] 
100%|██████████| 101/101 [00:02<00:00, 42.74it/s]
2021-03-25 07:59:35.901 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (140.243735 --> 109.148388).  Saving model ...
100%|██████████| 403/403 [00:05<00:00, 73.95it/s, epoch=3, loss=3356.3323] 
100%|██████████| 101/101 [00:01<00:00, 61.07it/s]
2021-03-25 07:59:43.665 | INFO     | __main__:save_checkpoint:44 - Validation los

5.576261121563591



100%|██████████| 403/403 [00:04<00:00, 87.48it/s, epoch=0, loss=13810.2090] 
100%|██████████| 101/101 [00:00<00:00, 284.03it/s]
2021-03-25 08:06:55.719 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (inf --> 158.414161).  Saving model ...
100%|██████████| 403/403 [00:03<00:00, 122.31it/s, epoch=1, loss=9798.1016] 
100%|██████████| 101/101 [00:00<00:00, 295.37it/s]
2021-03-25 08:07:00.103 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (158.414161 --> 138.380990).  Saving model ...
100%|██████████| 403/403 [00:03<00:00, 121.08it/s, epoch=2, loss=6043.1499]
100%|██████████| 101/101 [00:00<00:00, 298.51it/s]
2021-03-25 08:07:04.513 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (138.380990 --> 108.880944).  Saving model ...
100%|██████████| 403/403 [00:03<00:00, 119.69it/s, epoch=3, loss=2944.4368]
100%|██████████| 101/101 [00:00<00:00, 307.27it/s]
2021-03-25 08:07:08.947 | INFO     | __main__:save_checkpoint:44 - Validation

5.532555534152686



100%|██████████| 403/403 [00:05<00:00, 73.05it/s, epoch=0, loss=13468.9639] 
100%|██████████| 101/101 [00:03<00:00, 33.39it/s]
2021-03-25 08:17:19.214 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (inf --> 159.797346).  Saving model ...
100%|██████████| 403/403 [00:06<00:00, 67.05it/s, epoch=1, loss=9900.6211]  
100%|██████████| 101/101 [00:03<00:00, 33.40it/s]
2021-03-25 08:17:29.028 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (159.797346 --> 138.899086).  Saving model ...
100%|██████████| 403/403 [00:06<00:00, 66.23it/s, epoch=2, loss=6574.1660] 
100%|██████████| 101/101 [00:03<00:00, 33.29it/s]
2021-03-25 08:17:38.930 | INFO     | __main__:save_checkpoint:44 - Validation loss decreased (138.899086 --> 110.352820).  Saving model ...
100%|██████████| 403/403 [00:04<00:00, 88.22it/s, epoch=3, loss=3355.8237] 
100%|██████████| 101/101 [00:00<00:00, 321.05it/s]
2021-03-25 08:17:47.034 | INFO     | __main__:save_checkpoint:44 - Validation lo

6.056071572541472
CPU times: user 23min 49s, sys: 7min 29s, total: 31min 18s
Wall time: 38min 15s





In [21]:
oof_score = comp_metric(oof_x, oof_y, np.zeros(len(df_train)), df_train['x'], df_train['y'], np.zeros(len(df_train)))
oof_score

5.687676722855005