# Overview
- yukiさんのノートブックを参考にする

In [1]:
import subprocess
cmd = "git rev-parse --short HEAD"
hash = subprocess.check_output(cmd.split()).strip().decode('utf-8')
print(hash)

1c30b36


# Const

In [2]:
NB = '002'
DIR_TRAIN = './../data_ignore/input/train/'
DIR_TEST = './../data_ignore/input/test/'
DIR_WIFI = './../data_ignore/input/wifi/'
PATH_SUB = './../data_ignore/input/sample_submission.csv'
PATH_99_SUB = './../data/input/floor_99per_acc_sub.csv'
DIR_SAVE_IGNORE = f'./../data_ignore/nb/{NB}/'
DIR_SAVE = f'./../data/nb/{NB}/'

In [3]:
config_str = '''
globals:
    seed: 5713
    device: cuda
    n_label: 24
    n_splits: 5
    random_sate: 42
    lr: 0.001
    patience: 10
    epoch: 100
    batch_size: 512
    skip_evaluate_num: 5
    num_feats: 20
    t_mux: 10
'''

# Import everything I need:)

In [4]:
import os
import yaml
import types
import random
import pickle
import builtins
import numpy as np
import pandas as pd
import pytablewriter
from icecream import ic
# from tqdm import tqdm
from fastprogress import progress_bar, master_bar
from glob import glob
from loguru import logger
from collections import OrderedDict

# sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import KFold

# pytorch
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

# Function

In [5]:
def imports():
    for name, val in globals().items():
        # module imports
        if isinstance(val, types.ModuleType):
            yield name, val

            # functions / callables
        if hasattr(val, '__call__'):
            yield name, val


def noglobal(f):
    '''
    ref: https://gist.github.com/raven38/4e4c3c7a179283c441f575d6e375510c
    '''
    return types.FunctionType(f.__code__,
                              dict(imports()),
                              f.__name__,
                              f.__defaults__,
                              f.__closure__
                              )


def comp_metric(xhat, yhat, fhat, x, y, f):
    intermediate = np.sqrt(np.power(xhat-x, 2) + np.power(yhat-y, 2)) + 15 * np.abs(fhat-f)
    return intermediate.sum()/xhat.shape[0]

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


@noglobal
def calc_metrics_site(x_pred, y_pred, f_pred, x_true, y_true, f_true, site_arr):
    '''
    site_arrは、intじゃなくて、"B1", "F2"みたいな感じで入力
    site_arr = le_site.inverse_transform(df_train.site_id)
    '''
    site_unique = sorted(np.unique(site_arr))
    score_list = []
    n_sample = []
    for site in site_unique:
        logics = site_arr == site
        x_pred_, y_pred_, f_pred_ = x_pred[logics], y_pred[logics], f_pred[logics]
        x_true_, y_true_, f_true_, = x_true[logics], y_true[logics], f_true[logics]
        len_x = len(x_pred_)
        
        intermediate = np.sqrt(np.power(x_pred_-x_true_, 2) + np.power(y_pred_-y_true_, 2)) + 15 * np.abs(f_pred_-f_true_)
        
        score_list.append(intermediate.sum()/len_x)
        n_sample.append(len_x)
    
    df_result = pd.DataFrame({
        'site': site_unique,
        'n_sample': n_sample,
        'score': score_list,
    })
    return df_result


@noglobal
def site_eval_report(df_result_site):
    df_result_site.insert(loc=0, column='No', value=df_result_site.index)
    writer = pytablewriter.MarkdownTableWriter()
    writer.from_dataframe(df_result_site)
    writer.write_table()

# Preparation

load config

In [6]:
config = yaml.safe_load(config_str)
config

{'globals': {'seed': 5713,
  'device': 'cuda',
  'n_label': 24,
  'n_splits': 5,
  'random_sate': 42,
  'lr': 0.001,
  'patience': 10,
  'epoch': 100,
  'batch_size': 512,
  'skip_evaluate_num': 5,
  'num_feats': 20,
  't_mux': 10}}

<br>

set

In [7]:
seed_everything(config['globals']['seed'])

if not os.path.exists(DIR_SAVE_IGNORE):
    os.makedirs(DIR_SAVE_IGNORE)
if not os.path.exists(DIR_SAVE):
    os.makedirs(DIR_SAVE)

<br>

load dataset

In [8]:
with open(f'{DIR_WIFI}train_all.pkl', 'rb') as f:
    df_train = pickle.load( f)
with open(f'{DIR_WIFI}test_all.pkl', 'rb') as f:
    df_test = pickle.load( f)

<br>

preprocessing

In [9]:
bssid_feats = [f'bssid_{i}' for i in range(config['globals']['num_feats'])]
rssi_feats  = [f'rssi_{i}' for i in range(config['globals']['num_feats'])]

In [10]:
# bssidの一覧作成
# wifi_bassidにはtrainとtest両方のbssidの一覧が含まれる

wifi_bssids = []
for i in range(100):
    wifi_bssids.extend(df_train.iloc[:,i].values.tolist())
wifi_bssids = list(set(wifi_bssids))

wifi_bssids_size = len(wifi_bssids)
print(f'BSSID TYPES: {wifi_bssids_size}')

wifi_bssids_test = []
for i in range(100):
    wifi_bssids_test.extend(df_test.iloc[:,i].values.tolist())
wifi_bssids_test = list(set(wifi_bssids_test))

wifi_bssids_size = len(wifi_bssids_test)
print(f'BSSID TYPES: {wifi_bssids_size}')

wifi_bssids.extend(wifi_bssids_test)
wifi_bssids_size = len(wifi_bssids)
print(f'BSSID TYPES: {wifi_bssids_size}')

BSSID TYPES: 61206
BSSID TYPES: 33042
BSSID TYPES: 94248


In [11]:
# LabelEncoding & StandardScaler

le = LabelEncoder()
le.fit(wifi_bssids)
le_site = LabelEncoder()
le_site.fit(df_train['site_id'])

ss = StandardScaler()
ss.fit(df_train.loc[:, rssi_feats])

StandardScaler()

In [12]:
df_train.loc[:, rssi_feats] = ss.transform(df_train.loc[:, rssi_feats])
df_test.loc[:, rssi_feats] = ss.transform(df_test.loc[:, rssi_feats])
for feat in bssid_feats:
    df_train.loc[:, feat] = le.transform(df_train.loc[:, feat])
    df_test.loc[:, feat] = le.transform(df_test.loc[:, feat])
    
    df_train.loc[:, feat] = df_train.loc[:, feat] + 1
    df_test.loc[:, feat] = df_test.loc[:, feat] + 1
    
df_train.loc[:, 'site_id'] = le_site.transform(df_train.loc[:, 'site_id'])
df_test.loc[:, 'site_id'] = le_site.transform(df_test.loc[:, 'site_id'])

df_train.loc[:, rssi_feats] = ss.transform(df_train.loc[:, rssi_feats])
df_test.loc[:, rssi_feats] = ss.transform(df_test.loc[:, rssi_feats])

In [13]:
site_count = len(df_train['site_id'].unique())
df_train.reset_index(drop=True, inplace=True)

In [14]:
df_train.head()

Unnamed: 0,bssid_0,bssid_1,bssid_2,bssid_3,bssid_4,bssid_5,bssid_6,bssid_7,bssid_8,bssid_9,...,rssi_95,rssi_96,rssi_97,rssi_98,rssi_99,x,y,floor,path,site_id
0,52393,35871,2765,34898,52710,35260,42720,33510,23417,15249,...,-79,-79,-79,-79,-79,107.85044,161.89262,-1,5e1580adf4c3420006d520d4,0
1,35871,52393,7487,34898,52710,35260,21971,15249,17025,5351,...,-79,-79,-79,-80,-80,107.85044,161.89262,-1,5e1580adf4c3420006d520d4,0
2,35871,52393,52710,34898,35260,23417,49408,6673,7487,48501,...,-77,-78,-78,-78,-78,98.33065,163.34334,-1,5e1580adf4c3420006d520d4,0
3,23417,34898,35260,52393,35871,3707,49408,15613,10167,4978,...,-75,-76,-76,-77,-77,98.33065,163.34334,-1,5e1580adf4c3420006d520d4,0
4,35871,35260,23417,19473,52393,3707,49408,18306,21410,52795,...,-75,-76,-76,-77,-77,98.33065,163.34334,-1,5e1580adf4c3420006d520d4,0


# Execute

In [15]:
class EarlyStopping:
    """
    Early stops the training if validation loss doesn't improve after a given patience.
    based on: https://github.com/Bjarten/early-stopping-pytorch
    """
    def __init__(self, save_name, fold, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.fold = fold
        self.save_name = save_name

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            logger.info(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            logger.info(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
            
        torch.save(model.state_dict(),  f'{self.save_name}_{self.fold}.pt')
        self.val_loss_min = val_loss

In [16]:
class IndoorWiFiNet(nn.Module):
    def __init__(self, bssid_size, site_size, num_feats):
        super(IndoorWiFiNet, self).__init__()
        self.bssid_embedding = nn.Embedding(bssid_size, 64, max_norm=True)
        self.site_embedding = nn.Embedding(site_size, 64, max_norm=True)

        self.rssi = nn.Sequential(
            nn.BatchNorm1d(20),
            nn.Linear(20, num_feats * 64)
        )

        concat_size = 64 + (num_feats * 64) + (num_feats * 64)
        self.bn1 = nn.BatchNorm1d(concat_size)

        self.flatten = nn.Flatten()

        self.dropout1 = nn.Dropout(0.3)
        self.linear1 = nn.Linear(in_features=concat_size, out_features=256)#, bias=False)
        self.bn2 = nn.BatchNorm1d(256)

        self.linear2 = nn.Linear(in_features=256, out_features=128)#, bias=False)
        self.linear3 = nn.Linear(in_features=128, out_features=16)#, bias=False)

        self.bn3 = nn.BatchNorm1d(128)
        self.bn4 = nn.BatchNorm1d(16)


        self.xy = nn.Linear(in_features=16, out_features=2)#, bias=False)
        self.floor = nn.Linear(in_features=16, out_features=1)#, bias=False)
        

    def forward(self, bssid, rssi, site):

        site = torch.reshape(site, (-1, 1))

        bssid_out = self.bssid_embedding(bssid)
        site_out = self.site_embedding(site)

        rssi_out = self.rssi(rssi)

        bssid_out = self.flatten(bssid_out)
        site_out = self.flatten(site_out)

        x = torch.cat([bssid_out, rssi_out, site_out], dim=1)
        x = self.bn1(x)
        x = self.dropout1(x)
        x = F.relu(self.linear1(x))
        x = self.bn2(x)

        x = F.relu(self.linear2(x))
        x = self.bn3(x)

        x = F.relu(self.linear3(x))
        x = self.bn4(x)

        #x = self.lstm(x)

        xy = self.xy(x)
        floor = self.floor(x)

        return xy, floor

In [17]:
class IndoorWiFiDataSet(Dataset):
    def __init__(self, wifi_df, bssid_feats_, rssi_feats_, data_type='test'):
        self.wifi_df = wifi_df
        self.data_type = data_type
        self.bssids = wifi_df[bssid_feats_].to_numpy().astype(np.int)    # <----- リファクタ
        self.rssis = wifi_df[rssi_feats_].to_numpy().astype(np.float)    # <----- リファクタ
        self.site_ids = wifi_df['site_id'].to_numpy()
        if data_type in ['train', 'valid']:
            self.floors = wifi_df['floor'].to_numpy().astype(np.int)
            self.xs = wifi_df['x'].to_numpy()
            self.ys = wifi_df['y'].to_numpy()
        
    def __len__(self):
        return len(self.wifi_df)

    def __getitem__(self, idx):

        #wifi_row = self.wifi_df.iloc[idx]

        bssid = self.bssids[idx,:]
        rssi = self.rssis[idx,:]
        site_id = self.site_ids[idx]

        if self.data_type in ['test', 'valid']:
            return bssid, rssi, site_id

        elif self.data_type == 'train':
            x = self.xs[idx]
            y = self.ys[idx]
            floor = self.floors[idx]

            return bssid, rssi, site_id, x, y, floor


In [18]:
class IndoorLoss(nn.Module):
    def __init__(self):
        super().__init__()

        self.mse = nn.MSELoss()

    def forward(self, x, y, floor, x_pred, y_pred, floor_pred):

        x_loss = self.mse(x, x_pred)
        y_loss = self.mse(y, y_pred)
        floor_loss = self.mse(floor, floor_pred)

        return x_loss + y_loss + floor_loss

In [19]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device(config['globals']['device'])

In [20]:
%%time

oof_x = np.zeros((len(df_train)))
oof_y = np.zeros((len(df_train)))
oof_floor = np.zeros((len(df_train)))
cv_scores = {}

kf = KFold(n_splits=config['globals']['n_splits'], random_state=42, shuffle=True)
for fold, (train_index, valid_index) in enumerate(kf.split(df_train)):
    print(f'\r\n====== {fold + 1} ======')
    net = IndoorWiFiNet(wifi_bssids_size, site_count, config['globals']['num_feats'])
    net = net.to(device)

    #criterion = IndoorLoss()
    criterion = nn.MSELoss()
    criterion = criterion.to(device)
    #optimizer = optim.SGD(net.parameters(), lr=Config.LR, weight_decay=0.0001, momentum=0.9)
    optimizer = torch.optim.Adam(net.parameters(), lr=config['globals']['lr'])
    
    train, valid = df_train.iloc[train_index], df_train.iloc[valid_index]

    # TODO oof用のdataloaderはいらないはず
    train_dataset = IndoorWiFiDataSet(train.reset_index(drop=True), bssid_feats, rssi_feats, data_type='train')
    valid_dataset = IndoorWiFiDataSet(valid.reset_index(drop=True), bssid_feats, rssi_feats, data_type='valid')

    trainloader = DataLoader(train_dataset, batch_size=config['globals']['batch_size'], shuffle=True, drop_last=True, num_workers=4)
    validloader = DataLoader(valid_dataset, batch_size=config['globals']['batch_size'], num_workers=4)

    early_stopping = EarlyStopping(save_name=f'{DIR_SAVE_IGNORE}checkpoint_{NB}', fold=fold+1, patience=config['globals']['patience'], verbose=True)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config['globals']['t_mux'])


    ############# TRAIN #############
    val_losses = []
    val_metrics = []
    mb = master_bar(range(1, config['globals']['epoch']+1))
    for epoch in mb:
        mb.main_bar.comment = f'epoch = {epoch}'
        running_loss = 0.0
        #train_lwlrap = 0.0
#         n_iter = len(trainloader)
#         with tqdm(enumerate(trainloader), total=n_iter) as pbar:
        for i, (bssid, rssi, site_id, x, y, floor) in enumerate(progress_bar(trainloader, parent=mb)):
            net.train()
            # zero the parameter gradients
            optimizer.zero_grad()

            bssid, rssi, site_id, x, y, floor = bssid.to(device).long(), rssi.to(device).float(), site_id.to(device).long(), x.to(device).float(), y.to(device).float(), floor.to(device)

            xy_pred, floor_pred = net(bssid, rssi, site_id)
            xy_pred = xy_pred.transpose(0, 1).squeeze(-2).reshape(-1)
            label = torch.cat([x, y], dim=-1)
            loss = criterion(xy_pred, label)
            #loss = criterion(x, y, floor, x_pred, y_pred, floor_pred)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
#             mb.child.comment(OrderedDict(
#                 epoch="{:>10}".format(epoch), loss="{:.4f}".format(loss.item())
#             ))
            mb.child.comment = f'loss={loss.item():.4f}'
        scheduler.step()
            
        #if epoch < Config.SKIP_EVALUATE_NUM:
        #    continue

        ############# VALID #############
        val_loss = 0.0
        val_lwlrap = 0.0
        x_preds = np.array([])
        y_preds = np.array([])
        val_preds_frame = []
        n_iter_val = len(validloader)
#         for i, (bssid, rssi, site_id) in tqdm(enumerate(validloader), total=len(validloader)):
        for i, (bssid, rssi, site_id) in enumerate(progress_bar(validloader, parent=mb)):
            mb.child.comment = 'calc valid'
            net.eval()

            with torch.no_grad():
                
                #melspec, labels = melspec.to(device).float(), labels.to(device).float()
                bssid, rssi, site_id = bssid.to(device).long(), rssi.to(device).float(), site_id.to(device).long()
                xy_pred, floor_pred = net(bssid, rssi, site_id)

                xy_pred = xy_pred.to('cpu').detach().numpy().copy()
                x_pred, y_pred = np.hsplit(xy_pred, 2)
                x_preds = np.concatenate([x_preds, x_pred.reshape(-1)])
                y_preds = np.concatenate([y_preds, y_pred.reshape(-1)])

        score = comp_metric(x_preds, y_preds, 0, valid['x'], valid['y'], 0)

        early_stopping(score, net)

        if early_stopping.early_stop:
            logger.info("Early stopping")
            cv_scores[f'cv{fold + 1}'] = early_stopping.best_score
            break
                
    _x_oof = np.array([])
    _y_oof = np.array([])
    ############# OOF #############
    oof_preds = []
#     for i, (bssid, rssi, site_id) in tqdm(enumerate(validloader), total=len(validloader)):
    for i, (bssid, rssi, site_id) in enumerate(progress_bar(validloader, parent=mb)):
#         net.load_state_dict(torch.load(ROOT_DIR / Path('output') / f'checkpoint_{NB}_{fold}.pt'))
        net.load_state_dict(torch.load(f'{DIR_SAVE_IGNORE}checkpoint_{NB}_{fold + 1}.pt'))
        net.eval()

        with torch.no_grad():

            bssid, rssi, site_id, x, y, floor = bssid.to(device).long(), rssi.to(device).float(), site_id.to(device).long(), x.to(device).float(), y.to(device).float(), floor.to(device)

            xy_pred, floor_pred = net(bssid, rssi, site_id)

            xy_pred = xy_pred.to('cpu').detach().numpy().copy()
            x_pred, y_pred = np.hsplit(xy_pred, 2)
            _x_oof = np.concatenate([_x_oof, x_pred.reshape(-1)])
            _y_oof = np.concatenate([_y_oof, y_pred.reshape(-1)])

    print(comp_metric(_x_oof, _y_oof, 0, valid['x'], valid['y'], 0))

    oof_x[valid_index] = _x_oof
    oof_y[valid_index] = _y_oof

    del net
    torch.cuda.empty_cache()




2021-04-09 09:47:28.247 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (inf --> 158.660448).  Saving model ...
2021-04-09 09:47:32.260 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (158.660448 --> 139.490426).  Saving model ...
2021-04-09 09:47:36.213 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (139.490426 --> 114.101066).  Saving model ...
2021-04-09 09:47:40.280 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (114.101066 --> 89.173849).  Saving model ...
2021-04-09 09:47:44.256 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (89.173849 --> 66.712030).  Saving model ...
2021-04-09 09:47:48.237 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (66.712030 --> 53.446055).  Saving model ...
2021-04-09 09:47:52.216 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (53.446055 --> 42.796336).  Saving model ...
2021-04-09 09:47:56.325 | INFO    

6.005799526193263



2021-04-09 09:50:25.562 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (inf --> 158.995700).  Saving model ...
2021-04-09 09:50:32.406 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (158.995700 --> 140.084158).  Saving model ...
2021-04-09 09:50:39.201 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (140.084158 --> 113.370383).  Saving model ...
2021-04-09 09:50:45.975 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (113.370383 --> 87.392728).  Saving model ...
2021-04-09 09:50:52.832 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (87.392728 --> 67.323958).  Saving model ...
2021-04-09 09:50:59.549 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (67.323958 --> 54.520849).  Saving model ...
2021-04-09 09:51:06.352 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (54.520849 --> 43.584443).  Saving model ...
2021-04-09 09:51:13.113 | INFO    

6.011297022485053



2021-04-09 09:55:09.924 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (inf --> 159.641658).  Saving model ...
2021-04-09 09:55:16.974 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (159.641658 --> 139.531998).  Saving model ...
2021-04-09 09:55:24.805 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (139.531998 --> 113.458663).  Saving model ...
2021-04-09 09:55:31.799 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (113.458663 --> 90.565456).  Saving model ...
2021-04-09 09:55:38.824 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (90.565456 --> 67.869719).  Saving model ...
2021-04-09 09:55:45.839 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (67.869719 --> 53.929203).  Saving model ...
2021-04-09 09:55:52.952 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (53.929203 --> 43.059372).  Saving model ...
2021-04-09 09:56:00.122 | INFO    

6.071137844910877



2021-04-09 10:00:24.334 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (inf --> 158.695697).  Saving model ...
2021-04-09 10:00:31.630 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (158.695697 --> 139.134984).  Saving model ...
2021-04-09 10:00:38.883 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (139.134984 --> 114.372478).  Saving model ...
2021-04-09 10:00:46.879 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (114.372478 --> 89.867350).  Saving model ...
2021-04-09 10:00:54.186 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (89.867350 --> 68.984971).  Saving model ...
2021-04-09 10:01:01.516 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (68.984971 --> 55.048646).  Saving model ...
2021-04-09 10:01:08.881 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (55.048646 --> 45.088213).  Saving model ...
2021-04-09 10:01:16.162 | INFO    

5.998708651509524



2021-04-09 10:05:44.709 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (inf --> 159.829914).  Saving model ...
2021-04-09 10:05:52.403 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (159.829914 --> 139.165347).  Saving model ...
2021-04-09 10:06:01.581 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (139.165347 --> 114.202391).  Saving model ...
2021-04-09 10:06:10.001 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (114.202391 --> 87.820906).  Saving model ...
2021-04-09 10:06:17.601 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (87.820906 --> 67.976062).  Saving model ...
2021-04-09 10:06:25.285 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (67.976062 --> 53.725723).  Saving model ...
2021-04-09 10:06:33.000 | INFO     | __main__:save_checkpoint:46 - Validation loss decreased (53.725723 --> 44.749210).  Saving model ...
2021-04-09 10:06:40.627 | INFO    

In [21]:
oof_score = comp_metric(oof_x, oof_y, np.zeros(len(df_train)), df_train['x'], df_train['y'], np.zeros(len(df_train)))
oof_score

6.014108136133077

# test

In [22]:
%%time
x_preds_mean = np.zeros(len(df_test))
y_preds_mean = np.zeros(len(df_test))

for fold in range(config['globals']['n_splits']):

    x_preds = np.array([])
    y_preds = np.array([])

    net = IndoorWiFiNet(wifi_bssids_size, site_count, config['globals']['num_feats'])
    net.to(device)
#     print(ROOT_DIR / Path('output') / f'load checkpoint_{NB}_{fold}.pt')
#     net.load_state_dict(torch.load(ROOT_DIR / Path('output') / f'checkpoint_{NB}_{fold}.pt'))
    net.load_state_dict(torch.load(f'{DIR_SAVE_IGNORE}checkpoint_{NB}_{fold + 1}.pt'))

    test_dataset = IndoorWiFiDataSet(df_test, bssid_feats, rssi_feats, data_type='test')
#     valid_dataset = IndoorWiFiDataSet(valid.reset_index(drop=True), bssid_feats, rssi_feats, data_type='valid')
    testloader = DataLoader(test_dataset, batch_size=config['globals']['batch_size'], num_workers=4)
    
    test_preds = []
    for i, (bssid, rssi, site_id) in enumerate(progress_bar(testloader)):
        net.eval()

        with torch.no_grad():

            bssid, rssi, site_id = bssid.to(device).long(), rssi.to(device).float(), site_id.to(device).long()

            xy_pred, floor_pred = net(bssid, rssi, site_id)
            xy_pred = xy_pred.to('cpu').detach().numpy().copy()

            x_pred, y_pred = np.hsplit(xy_pred, 2)
            x_preds = np.concatenate([x_preds, x_pred.reshape(-1)])
            y_preds = np.concatenate([y_preds, y_pred.reshape(-1)])

    x_preds_mean += x_preds / config['globals']['n_splits']
    y_preds_mean += y_preds / config['globals']['n_splits']

CPU times: user 559 ms, sys: 1.54 s, total: 2.1 s
Wall time: 11.6 s


In [23]:
x_preds_mean

array([ 86.4803299 ,  78.9365448 ,  78.03311462, ..., 204.62711792,
       202.65791626, 202.10221863])

In [24]:
df_xy_oof = pd.DataFrame()
df_xy_oof['oof_x'] = oof_x
df_xy_oof['oof_y'] = oof_y
df_xy_oof.to_csv(f'{DIR_SAVE}nb{NB}_oof_xy.csv', index=False)

In [25]:
sample_submission = pd.read_csv(PATH_SUB)
submission_99per_acc = pd.read_csv(PATH_99_SUB)

pred_df = df_test[['site_path_timestamp']].copy()
pred_df['x'] = x_preds_mean
pred_df['y'] = y_preds_mean
sub = pred_df.merge(submission_99per_acc[['site_path_timestamp', 'floor']], how='left', on='site_path_timestamp')[['site_path_timestamp', 'floor', 'x', 'y']]

In [26]:
assert len(sample_submission.merge(sub, how='inner', on='site_path_timestamp')) == len(sample_submission)

In [27]:
sub_file_path = f'{DIR_SAVE}nb{NB}_sub.csv'
sub.to_csv(sub_file_path, index=False)

# Analysis

In [28]:
site_arr = le_site.inverse_transform(df_train.site_id)
df_result_site = calc_metrics_site(oof_x, oof_y, np.zeros(len(df_train)), df_train['x'], df_train['y'], np.zeros(len(df_train)), site_arr)
site_eval_report(df_result_site)

|No |          site          |n_sample|score|
|--:|------------------------|-------:|----:|
|  0|5a0546857ecc773753327266|    9296|5.532|
|  1|5c3c44b80379370013e0fd2b|    9737|7.422|
|  2|5d27075f03f801723c2e360f|   23666|5.832|
|  3|5d27096c03f801723c31e5e0|    9100|4.452|
|  4|5d27097f03f801723c320d97|   10507|7.720|
|  5|5d27099f03f801723c32511d|    4251|5.136|
|  6|5d2709a003f801723c3251bf|    3940|4.584|
|  7|5d2709b303f801723c327472|   15358|5.497|
|  8|5d2709bb03f801723c32852c|   17203|6.578|
|  9|5d2709c303f801723c3299ee|   10083|5.469|
| 10|5d2709d403f801723c32bd39|   10027|5.558|
| 11|5d2709e003f801723c32d896|   11042|6.515|
| 12|5da138274db8ce0c98bbd3d2|    2662|5.101|
| 13|5da1382d4db8ce0c98bbe92e|    8999|6.456|
| 14|5da138314db8ce0c98bbf3a0|    9012|4.372|
| 15|5da138364db8ce0c98bc00f1|    2767|3.919|
| 16|5da1383b4db8ce0c98bc11ab|   13196|6.497|
| 17|5da138754db8ce0c98bca82f|    7188|5.655|
| 18|5da138764db8ce0c98bcaa46|    9420|5.485|
| 19|5da1389e4db8ce0c98bd0547|    