In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import numpy as np
traval_tra_features = np.load('traval_tra_features.npy').astype(np.float32)
traval_tra_targets = np.load('traval_tra_targets.npy').astype(np.int64)[:,0]
traval_val_features = np.load('traval_val_features.npy').astype(np.float32)
traval_val_targets = np.load('traval_val_targets.npy').astype(np.int64)[:,0]
traval_tra_features.shape, traval_val_features.shape

((687636, 2600), (76405, 2600))

In [2]:
import torch
from torch import optim
from torch import nn
import torch.nn.functional as F
from skorch import NeuralNetClassifier
import sklearn
from skorch.callbacks import EpochScoring, LRScheduler, Checkpoint
from torch.optim import Adam, SGD
import adamod



In [3]:
from skorch.dataset import Dataset
from skorch.helper import predefined_split
valid_ds = Dataset(traval_val_features, traval_val_targets)

In [4]:
class DownSample2x(nn.Sequential):
    def __init__(self, _in, _out):
        super().__init__(
            nn.Conv1d(_in, _out, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
        )

class SELayer(nn.Module):
    def __init__(self, _in, _hidden=64):
        super(SELayer, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Sequential(
            nn.Linear(_in, _hidden),
            nn.PReLU(),
            nn.Linear(_hidden, _in),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1)
        return x * y
    
class ResConv1d(nn.Module):
    def __init__(self, _in, _out):
        super(ResConv1d, self).__init__()
        
        self.cal = nn.Sequential(
            nn.Conv1d(_in, _out, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm1d(_out),
            nn.ReLU(),
            nn.Conv1d(_out, _out, kernel_size=3, padding=1, stride=1),
            nn.BatchNorm1d(_out),
        )
        self.se = SELayer(_out, _out)
        self.conv = nn.Conv1d(_in, _out, kernel_size=1, padding=0, stride=1)
        self.relu = nn.ReLU()
        self.bn = nn.BatchNorm1d(_out)
        
    def forward(self, x):
        res = self.cal(x)
        res = self.se(res)
        
        x = self.bn(self.conv(x))
        
        return self.relu(res + x)
        
class MyModule(nn.Module):
    def __init__(self, nonlin=F.relu):
        super(MyModule, self).__init__()
        
        self.d1 = DownSample2x(1, 64)
        self.c1 = ResConv1d(64, 64)
        
        self.d2 = DownSample2x(64, 64)
        self.c2 = ResConv1d(64, 64)
        
        self.d3 = DownSample2x(64, 64)
        self.c3 = ResConv1d(64, 64)
        
        self.d4 = DownSample2x(64, 64)
        self.c4 = ResConv1d(64, 64)
        
        self.d5 = DownSample2x(64, 64)
        self.c5 = ResConv1d(64, 64)
        
        self.d6 = DownSample2x(64, 64)
        self.c6 = ResConv1d(64, 64)
        
        self.dropout = nn.Dropout(0.5)
        self.cls = nn.Linear(64 * 40, 3)
        
    def preprocess(self, x, p=2, eps=1e-8):
        x = x / (x.norm(p=p, dim=1, keepdim=True)+eps)
        x = x.unsqueeze(1)
        return x
        
    def forward(self, x):
        bs = x.size(0)
        x = self.preprocess(x)
        
        x = self.d1(x)
        x = self.c1(x)
        
        x = self.d2(x)
        x = self.c2(x)
        
        x = self.d3(x)
        x = self.c3(x)

        x = self.d4(x)
        x = self.c4(x)
        
        x = self.d5(x)
        x = self.c5(x)
        
        x = self.d6(x)
        x = self.c6(x)
        
        x = x.reshape(bs, -1)
        x = self.dropout(x)

        return F.softmax(self.cls(x))

    
def microf1(net, ds, y=None):
    y_true = [y for _, y in ds]
    y_pred = net.predict(ds)
    return sklearn.metrics.f1_score(y_true, y_pred,average='micro')
def macrof1(net, ds, y=None):
    y_true = [y for _, y in ds]
    y_pred = net.predict(ds)
    return sklearn.metrics.f1_score(y_true, y_pred,average='macro')

class F1Loss(nn.Module):
    def __init__(self):
        super(F1Loss, self).__init__()

    def forward(self, input, target, eps=1e-10):
        loss = 0
        for idx, i in enumerate(torch.eye(3).cuda()):
            t = i.view(3,1)
            y_pred_ = input.matmul(t).squeeze()
            y_true_ = target==idx
            loss += 0.5 * (y_true_ * y_pred_).sum() / (y_true_ + y_pred_ + eps).sum()
        return -torch.log(loss+eps)

In [6]:
net = NeuralNetClassifier(
    MyModule,
    max_epochs=10,
    lr=0.0001,
    batch_size=128,
    optimizer=Adam,
    iterator_train__shuffle=True,
    criterion=F1Loss,
    train_split=predefined_split(valid_ds),
    callbacks=[EpochScoring(macrof1, use_caching=True, lower_is_better=False),
               EpochScoring(microf1, use_caching=True, lower_is_better=False)],
    device='cuda'
)

In [31]:
import numpy as np
preds = []
files = ['03_model'+str(i)+'/params.pt' for i in [1,2,3,4,11,12,13,14,21,22,23,24,31,32,33,34]]
for file in files:
    net.initialize() 
    net.load_params(f_params=file)
    print(file)
    ans = net.predict_proba(traval_val_features)
    preds.append(ans)
    print(ans.shape)
preds_np = np.concatenate([i[:,None,:] for i in preds], axis=1)
preds_np.shape

Re-initializing module.
Re-initializing optimizer.
03_model1/params.pt




(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model2/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model3/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model4/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model11/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model12/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model13/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model14/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model21/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model22/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model23/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model24/params.pt
(76405, 3)
Re-initializing module.
Re-initializing optimizer.
03_model31/params.pt
(76405,

(76405, 16, 3)

In [33]:
import sklearn
for i in range(len(files)):
    print(sklearn.metrics.f1_score(traval_val_targets, np.argmax(preds_np[:,i,:],axis=-1),average='macro'))
    
print('mean:')
preds_w = np.zeros_like(preds_np[:,0,:])
w = [1]*16
for idx in range(len(w)):
    preds_w+=preds_np[:,idx,:]*w[idx]
print(sklearn.metrics.f1_score(traval_val_targets, np.argmax(preds_w,axis=-1),average='macro'))

print('weight:')
preds_w = np.zeros_like(preds_np[:,0,:])
w = [1,1,2,1,1,1,1,1,1,2,1,1,1,1,1,1]
for idx in range(len(w)):
    preds_w+=preds_np[:,idx,:]*w[idx]
print(sklearn.metrics.f1_score(traval_val_targets, np.argmax(preds_w,axis=-1),average='macro'))

0.9837454286579623
0.9837277031205023
0.9841201980752211
0.9835818670398346
0.9836657697537247
0.9836394989584393
0.9837269174613841
0.9838323049060932
0.9832873964942296
0.9840603134691754
0.9833274136502211
0.9839531619190827
0.9834960956279978
0.9838624442945975
0.982914339936784
0.983713373774072
mean:
0.9854693211835676
weight:
0.9852281087088728


In [None]:
"""
without f1
0.9837454286579623
0.9837277031205023
0.9841201980752211
0.9835818670398346
0.9836657697537247
0.9836394989584393
0.9837269174613841
0.9838323049060932
0.9832873964942296
0.9840603134691754
0.9833274136502211
0.9839531619190827
0.9834960956279978
0.9838624442945975
0.982914339936784
0.983713373774072
mean:
0.9854693211835676
weight:
0.9852281087088728

with f1:
0.9847379927372794
0.9850451497498632
0.9850169348505221
0.9850028755295784
0.9845799964396283
0.9849547437168384
0.9850696446139503
0.9848078173306157
0.9845703118789038
0.9851591709256707
0.9848471043024284
0.9852402076448908
0.9845585545753571
0.9848449739433897
0.9849713606291938
0.9848816478458112
mean:
0.9856239374676373
weight:
0.9854902328933562
"""

In [29]:
test_features = np.load('test_features.npy').astype(np.float32)
test_rowids = np.load('test_rowids.npy')

In [34]:
import numpy as np
preds = []
files = ['03_model'+str(i)+'/f1params.pt' for i in [1,2,3,4,11,12,13,14,21,22,23,24,31,32,33,34]]
for file in files:
    net.initialize() 
    net.load_params(f_params=file)
    print(file)
    ans = net.predict_proba(test_features)
    preds.append(ans)
    print(ans.shape)
preds_np = np.concatenate([i[:,None,:] for i in preds], axis=1)
preds_np.shape

Re-initializing module.
Re-initializing optimizer.
03_model1/f1params.pt




(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model2/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model3/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model4/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model11/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model12/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model13/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model14/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model21/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model22/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model23/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optimizer.
03_model24/f1params.pt
(190624, 3)
Re-initializing module.
Re-initializing optim

(190624, 16, 3)

In [35]:
preds_w = np.zeros_like(preds_np[:,0,:])
w = [1]*16
for idx in range(len(w)):
    preds_w+=preds_np[:,idx,:]*w[idx]
ans=np.argmax(preds_w,axis=-1)

In [37]:
import pandas as pd
result_df = pd.DataFrame({
    'id': test_rowids,
    'label': ans,
})
result_df.id = result_df.id.map(lambda x: x.strip())
id2name = {
    0:'star',
    1:'galaxy',
    2:'qso'
}
result_df.label = result_df.label.map(lambda x: id2name[x])
result_df.to_csv('r4.csv',index=None)

In [38]:
sum(ans==0),sum(ans==1),sum(ans==2)

(160058, 23330, 7236)