In [1]:
#https://drive.google.com/file/d/1EVwP6MUQAeSnGM-ywBRHspCn_OWEfzJN/view?usp=sharing
!curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=1EVwP6MUQAeSnGM-ywBRHspCn_OWEfzJN" > /dev/null
!curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=1EVwP6MUQAeSnGM-ywBRHspCn_OWEfzJN" -o train.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   408    0   408    0     0   1136      0 --:--:-- --:--:-- --:--:--  1139
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:01 --:--:--     0
100  955k  100  955k    0     0   766k      0  0:00:01  0:00:01 --:--:--  766k


In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,id,time,x,y
0,1,0,800,0
1,1,780,780,0
2,1,1572,792,0
3,1,2392,820,0
4,1,3196,804,0


In [4]:
df['marker'] = np.multiply(df[['id','y']].groupby('id').agg(np.cumsum).values.ravel(), df['y'].values)
df['start'] = ((df['y'].shift(1, fill_value=0) == 0).values & (df['y'] == 1)).values
df['end'] = ((df['y'].shift(-1, fill_value=0) == 0).values & (df['y'] == 1)).values

q = df.loc[df['end'],'marker'] - df.loc[df['end'],'start']
print('аномалий: ', q.shape[0], 'средняя длина: ', int(q.mean()*100.0)*0.01, 'min: ', q.min(), 'max: ', q.max())

аномалий:  791 средняя длина:  25.060000000000002 min:  6 max:  81


In [5]:
import plotly.express as px
import plotly.graph_objects as go

for q in [111]:
    t = df.loc[df['id'] == q].sort_values('time').reset_index(drop=True)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=t['time'], y=t['x'] - t['x'].mean(),
                        mode='lines',
                        name='lines'))
    qt = t.loc[t.y==1].reset_index(drop=True)
    fig.add_trace(go.Scatter(x=qt['time'], y=qt['x'] - t['x'].mean(),
                        mode='markers', name='markers'))
    print(t.x.mean())
    fig.show()


650.987012987013


In [6]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import torch
from torch.utils.data import Dataset

def scale_ts(vls):
    sc = StandardScaler()
    return sc.fit_transform(vls.reshape(-1,1)).ravel()
def scale_time_ts(vls):
    sc = MinMaxScaler()
    return sc.fit_transform(vls.reshape(-1,1)).ravel()

class CardioDataset(Dataset):
    def __init__(self, df, win_size=32):
        self.df = df.sort_values(['id','time']).reset_index(drop=True).copy()
        self.df['time'] = df.groupby('id')['time'].agg('diff').fillna(0).values
        self.df['time'] = scale_time_ts(self.df['time'].values)
        self.win_size = win_size

        self.point_indexes = []
        self.win_lens = []
        dfs = []
        total_len = 0
        for q,qdf in self.df.groupby('id'):
            qdf['x'] = scale_ts(qdf['x'].values)
            for i in range(max(1, qdf.shape[0] - win_size + 1)):
                self.point_indexes.append(i + total_len)
                if i + win_size > qdf.shape[0]:
                    self.win_lens.append(qdf.shape[0] - i)
                else:
                    self.win_lens.append(win_size)
            total_len += qdf.shape[0]
            dfs.append(qdf)
        self.df = pd.concat(dfs, ignore_index=True).reset_index(drop=True)

    def __len__(self):
        return len(self.point_indexes)

    def __getitem__(self, idx):
        i0 = self.point_indexes[idx]
        i1 = i0 + self.win_lens[idx]

        x_mat = np.zeros((self.win_size,2))
        y_mat = np.zeros(self.win_size)
        x_mat[-self.win_lens[idx]:,0] = self.df.iloc[i0:i1].x.values
        x_mat[-self.win_lens[idx]:,1] = self.df.iloc[i0:i1].time.values
        y_mat[-self.win_lens[idx]:] = self.df.iloc[i0:i1].y.values
        
        return {"x": x_mat,
                "y": y_mat,
                "start": i0,
                "end": i1
               }

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
class CardioRnn(nn.Module):
    def __init__(self, win_size, output_size, rnn_units = 32):
        super().__init__()
        self._gru = nn.GRU(input_size=2, 
                           hidden_size=rnn_units, 
                           batch_first=True, 
                           bidirectional=True)

        self._head = nn.Linear(in_features = 4 * rnn_units, 
                               out_features=output_size)
        
        self.fcdrop2 = nn.Dropout(p=0.3)
        
    def forward(self, x_feats):
        encoded, _ = self._gru(x_feats)
        x2 = torch.max(encoded, axis=1).values
        x2 = torch.squeeze(x2)

        x3 = torch.mean(encoded, axis=1)
        x3 = torch.squeeze(x3)

        x = torch.cat([x2,x3],-1)
        x = F.relu(x)
        x = self.fcdrop2(x)
        x = self._head(x)
        return x

In [12]:
from torch.utils.data import DataLoader
from torch.utils.data import RandomSampler, SequentialSampler

model = CardioRnn(32, 32).to(device)

ds = CardioDataset(df, 32)
print(len(ds))
train_sampler = SequentialSampler(ds)
batch_size = 1
train_dl = DataLoader(ds, sampler=train_sampler, batch_size=batch_size, num_workers=2)
for x in train_dl:
    print(model(x['x'].float().to(device)))
    break

53388
tensor([ 0.0927, -0.0028, -0.1356,  0.2113,  0.1264, -0.1228, -0.0356, -0.2024,
         0.1881, -0.3333, -0.0603, -0.1007,  0.1015, -0.1638, -0.1082,  0.1072,
        -0.0653, -0.2886,  0.0441,  0.1270,  0.0874, -0.1646, -0.0966, -0.2795,
         0.0826, -0.1080, -0.2170,  0.0131,  0.1661, -0.0356, -0.0217,  0.2447],
       device='cuda:0', grad_fn=<AddBackward0>)


In [13]:
from sklearn.metrics import roc_curve, precision_recall_curve, f1_score

def threshold_search(y_true, y_proba):
    precision , recall, thresholds = precision_recall_curve(y_true, y_proba)
    thresholds = np.append(thresholds, 1.001) 
    F = 2 / (1/precision + 1/recall)
    best_score = np.max(F)
    best_th = thresholds[np.argmax(F)]
    return best_th , best_score

In [14]:
%%writefile lookahead.py
# Lookahead implementation from https://github.com/rwightman/pytorch-image-models/blob/master/timm/optim/lookahead.py

""" Lookahead Optimizer Wrapper.
Implementation modified from: https://github.com/alphadl/lookahead.pytorch
Paper: `Lookahead Optimizer: k steps forward, 1 step back` - https://arxiv.org/abs/1907.08610
"""
import torch
from torch.optim.optimizer import Optimizer
from collections import defaultdict

class Lookahead(Optimizer):
    def __init__(self, base_optimizer, alpha=0.5, k=6):
        if not 0.0 <= alpha <= 1.0:
            raise ValueError(f'Invalid slow update rate: {alpha}')
        if not 1 <= k:
            raise ValueError(f'Invalid lookahead steps: {k}')
        defaults = dict(lookahead_alpha=alpha, lookahead_k=k, lookahead_step=0)
        self.base_optimizer = base_optimizer
        self.param_groups = self.base_optimizer.param_groups
        self.defaults = base_optimizer.defaults
        self.defaults.update(defaults)
        self.state = defaultdict(dict)
        # manually add our defaults to the param groups
        for name, default in defaults.items():
            for group in self.param_groups:
                group.setdefault(name, default)

    def update_slow(self, group):
        for fast_p in group["params"]:
            if fast_p.grad is None:
                continue
            param_state = self.state[fast_p]
            if 'slow_buffer' not in param_state:
                param_state['slow_buffer'] = torch.empty_like(fast_p.data)
                param_state['slow_buffer'].copy_(fast_p.data)
            slow = param_state['slow_buffer']
            slow.add_(group['lookahead_alpha'], fast_p.data - slow)
            fast_p.data.copy_(slow)

    def sync_lookahead(self):
        for group in self.param_groups:
            self.update_slow(group)

    def step(self, closure=None):
        # print(self.k)
        #assert id(self.param_groups) == id(self.base_optimizer.param_groups)
        loss = self.base_optimizer.step(closure)
        for group in self.param_groups:
            group['lookahead_step'] += 1
            if group['lookahead_step'] % group['lookahead_k'] == 0:
                self.update_slow(group)
        return loss

    def state_dict(self):
        fast_state_dict = self.base_optimizer.state_dict()
        slow_state = {
            (id(k) if isinstance(k, torch.Tensor) else k): v
            for k, v in self.state.items()
        }
        fast_state = fast_state_dict['state']
        param_groups = fast_state_dict['param_groups']
        return {
            'state': fast_state,
            'slow_state': slow_state,
            'param_groups': param_groups,
        }

    def load_state_dict(self, state_dict):
        fast_state_dict = {
            'state': state_dict['state'],
            'param_groups': state_dict['param_groups'],
        }
        self.base_optimizer.load_state_dict(fast_state_dict)

        # We want to restore the slow state, but share param_groups reference
        # with base_optimizer. This is a bit redundant but least code
        slow_state_new = False
        if 'slow_state' not in state_dict:
            print('Loading state_dict from optimizer without Lookahead applied.')
            state_dict['slow_state'] = defaultdict(dict)
            slow_state_new = True
        slow_state_dict = {
            'state': state_dict['slow_state'],
            'param_groups': state_dict['param_groups'],  # this is pointless but saves code
        }
        super(Lookahead, self).load_state_dict(slow_state_dict)
        self.param_groups = self.base_optimizer.param_groups  # make both ref same container
        if slow_state_new:
            # reapply defaults to catch missing lookahead specific ones
            for name, default in self.defaults.items():
                for group in self.param_groups:
                    group.setdefault(name, default)

Writing lookahead.py


In [15]:
%%writefile ralamb.py
import torch, math
from torch.optim.optimizer import Optimizer

# RAdam + LARS
class Ralamb(Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        self.buffer = [[None, None, None] for ind in range(10)]
        super(Ralamb, self).__init__(params, defaults)

    def __setstate__(self, state):
        super(Ralamb, self).__setstate__(state)

    def step(self, closure=None):

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:

            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data.float()
                if grad.is_sparse:
                    raise RuntimeError('Ralamb does not support sparse gradients')

                p_data_fp32 = p.data.float()

                state = self.state[p]

                if len(state) == 0:
                    state['step'] = 0
                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
                else:
                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                # Decay the first and second moment running average coefficient
                # m_t
                exp_avg.mul_(beta1).add_(1 - beta1, grad)
                # v_t
                exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)

                state['step'] += 1
                buffered = self.buffer[int(state['step'] % 10)]

                if state['step'] == buffered[0]:
                    N_sma, radam_step_size = buffered[1], buffered[2]
                else:
                    buffered[0] = state['step']
                    beta2_t = beta2 ** state['step']
                    N_sma_max = 2 / (1 - beta2) - 1
                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
                    buffered[1] = N_sma

                    # more conservative since it's an approximated value
                    if N_sma >= 5:
                        radam_step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
                    else:
                        radam_step_size = 1.0 / (1 - beta1 ** state['step'])
                    buffered[2] = radam_step_size

                if group['weight_decay'] != 0:
                    p_data_fp32.add_(-group['weight_decay'] * group['lr'], p_data_fp32)

                # more conservative since it's an approximated value
                radam_step = p_data_fp32.clone()
                if N_sma >= 5:
                    denom = exp_avg_sq.sqrt().add_(group['eps'])
                    radam_step.addcdiv_(-radam_step_size * group['lr'], exp_avg, denom)
                else:
                    radam_step.add_(-radam_step_size * group['lr'], exp_avg)

                radam_norm = radam_step.pow(2).sum().sqrt()
                weight_norm = p.data.pow(2).sum().sqrt().clamp(0, 10)
                if weight_norm == 0 or radam_norm == 0:
                    trust_ratio = 1
                else:
                    trust_ratio = weight_norm / radam_norm

                state['weight_norm'] = weight_norm
                state['adam_norm'] = radam_norm
                state['trust_ratio'] = trust_ratio

                if N_sma >= 5:
                    p_data_fp32.addcdiv_(-radam_step_size * group['lr'] * trust_ratio, exp_avg, denom)
                else:
                    p_data_fp32.add_(-radam_step_size * group['lr'] * trust_ratio, exp_avg)

                p.data.copy_(p_data_fp32)

        return loss

Writing ralamb.py


In [16]:
from lookahead import *
from ralamb import * 

def Over9000(params, alpha=0.5, k=6, *args, **kwargs):
     ralamb = Ralamb(params, *args, **kwargs)
     return Lookahead(ralamb, alpha, k)

RangerLars = Over9000

In [26]:
import joblib 
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
import tqdm

WIN_SIZE = 32
RNN_UNITS = 32

kf = KFold(n_splits=5, shuffle=True, random_state=42)
ifold = 0
for tr_id, va_id in kf.split(df['id'].unique()):
    train_df = df.loc[df['id'].isin(tr_id)].reset_index(drop=True)
    valid_df = df.loc[df['id'].isin(va_id)].reset_index(drop=True)

    train_ds = CardioDataset(train_df, WIN_SIZE)
    valid_ds = CardioDataset(valid_df, WIN_SIZE)

    train_sampler = RandomSampler(train_ds)
    valid_sampler = SequentialSampler(valid_ds)
    batch_size = 256
    train_dl = DataLoader(train_ds, sampler=train_sampler, batch_size=batch_size, num_workers=2)
    valid_dl = DataLoader(valid_ds, sampler=valid_sampler, batch_size=batch_size, num_workers=2)

    model = CardioRnn(WIN_SIZE, WIN_SIZE, rnn_units=RNN_UNITS).to(device)

    nepochs = 5
    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.AdamW(model.parameters(),
                       lr = 1e-4, eps=1e-6                           
                       )
    #optimizer = RangerLars(model.parameters(), lr=3e-4)
    for epoch in range(nepochs):
        model.train();
        optimizer.zero_grad()
        i = 0
        for x in train_dl:
            out = model(x['x'].float().to(device))
            loss = criterion(out, x['y'].float().to(device)) 
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 10.0)
            optimizer.step() 
            optimizer.zero_grad()
            i += 1
        #torch.save(model.state_dict(), 'model_'+str(epoch) + '.pth')
    
        valid_df_predict = np.zeros((valid_df.shape[0],WIN_SIZE))
        model.eval();
        for x in valid_dl:
            out = model(x['x'].float().to(device))
            out = torch.nn.Softmax(dim=1)(out)
            for j in range(x['x'].shape[0]):
                i0,i1 = x["start"][j],x["end"][j]
                s = i1 - i0
                valid_df_predict[i0:i1, i0 % WIN_SIZE] = out[j].detach().cpu().numpy()[-s:]
        y_proba = np.amax(valid_df_predict, 1)
        y_true = valid_df.y.values

        best_th , best_score = threshold_search(y_true, y_proba)
        print('fold', ifold, 'epoch', epoch, 'best_th', best_th, 'f1-score', best_score)

    ifold += 1

fold 0 epoch 0 best_th 0.035379305481910706 f1-score 0.24848382749326148



divide by zero encountered in true_divide



fold 0 epoch 1 best_th 0.031115170568227768 f1-score 0.2492894164855375
fold 0 epoch 2 best_th 0.029670758172869682 f1-score 0.2500834724540902
fold 0 epoch 3 best_th 0.031222067773342133 f1-score 0.251602023608769
fold 0 epoch 4 best_th 0.031076762825250626 f1-score 0.2514131443516409
fold 1 epoch 0 best_th 0.04119512066245079 f1-score 0.19968635650810246



divide by zero encountered in true_divide



fold 1 epoch 1 best_th 0.03765447437763214 f1-score 0.1966414224563714
fold 1 epoch 2 best_th 0.03930889815092087 f1-score 0.2086720867208672
fold 1 epoch 3 best_th 0.03362691029906273 f1-score 0.20515695067264575
fold 1 epoch 4 best_th 0.031361132860183716 f1-score 0.19757203842049095
fold 2 epoch 0 best_th 0.032578855752944946 f1-score 0.20641520641520641



divide by zero encountered in true_divide



fold 2 epoch 1 best_th 0.032595112919807434 f1-score 0.20651204281891172
fold 2 epoch 2 best_th 0.03218702971935272 f1-score 0.20651441407712468
fold 2 epoch 3 best_th 0.03173034265637398 f1-score 0.20621192750244416
fold 2 epoch 4 best_th 0.031410206109285355 f1-score 0.20606788825473119
fold 3 epoch 0 best_th 0.03376809135079384 f1-score 0.4406779661016949



divide by zero encountered in true_divide



fold 3 epoch 1 best_th 0.0397295206785202 f1-score 0.45027322404371595
fold 3 epoch 2 best_th 0.03178536519408226 f1-score 0.4468500977296647
fold 3 epoch 3 best_th 0.03098348341882229 f1-score 0.44311556286440423
fold 3 epoch 4 best_th 0.0308694988489151 f1-score 0.4470059880239521
fold 4 epoch 0 best_th 0.036863062530756 f1-score 0.267772148011669



divide by zero encountered in true_divide



fold 4 epoch 1 best_th 0.03777872398495674 f1-score 0.26151136458944896
fold 4 epoch 2 best_th 0.03258337825536728 f1-score 0.2604992903064206
fold 4 epoch 3 best_th 0.030974939465522766 f1-score 0.26006655574043264
fold 4 epoch 4 best_th 0.03108399361371994 f1-score 0.26012044161927067
