In [9]:
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss
from scipy.special import softmax
import torch
def get_score(logits, labels):
    probs = softmax(logits, axis=1)
    probs = np.clip(probs, 1e-15, 1 - 1e-15)
    return log_loss(labels, probs, labels=[0, 1, 2])
LABEL_MAPPING = {"Ineffective": 0, "Adequate": 1, "Effective": 2}
# oof_paths = [
#     '../ckpt/train2/exp2/oof.gz',
#     '../ckpt/train2/exp3/oof.gz',
#     '../ckpt/train2/exp4/oof.gz',
#     '../ckpt/train2/exp5/oof.gz',
#     '../ckpt/train2/exp6/oof.gz',
# ]
oof_paths = [
    '../ckpt/train5/exp2/oof.gz',
    '../ckpt/train5/exp3/oof.gz',
    '../ckpt/train5/exp4/oof.gz',
]

In [10]:
train = pd.read_csv('../data/train_processed.csv')
train['label'] = train['discourse_effectiveness'].map(LABEL_MAPPING)
train = train[['discourse_id', 'label']]
train

Unnamed: 0,discourse_id,label
0,0013cc385424,1
1,9704a709b505,1
2,c22adee811b6,1
3,a10d361e54e4,1
4,db3e453ec4e2,1
...,...,...
36760,9f63b687e76a,1
36761,9d5bd7d86212,1
36762,f1b78becd573,1
36763,cc184624ca8e,0


In [11]:
for i, oof_path in enumerate(oof_paths):
    oof = pd.read_pickle(oof_path)
    assert(len(oof)==len(train))
    oof = oof.rename(columns={'logits': f'logits{i}'})
    train = train.merge(oof, on='discourse_id', how='inner')
train

Unnamed: 0,discourse_id,label,logits0,logits1,logits2
0,0013cc385424,1,"[-0.065736614, 2.109253, -1.1399033]","[0.99210924, 2.407321, -1.0702925]","[-0.47663593, 2.1981802, 0.40335074]"
1,9704a709b505,1,"[-0.49648172, 2.5704458, -0.7708018]","[0.45190388, 2.871887, -0.64158386]","[-0.312719, 2.6083314, 0.24635275]"
2,c22adee811b6,1,"[-0.07207063, 2.5864592, -1.2251159]","[1.3263549, 2.697746, -1.3674811]","[0.49606887, 2.6142902, -0.5667403]"
3,a10d361e54e4,1,"[0.61130714, 2.1531703, -1.8563699]","[2.1603744, 2.2281132, -2.068038]","[1.6152817, 2.117956, -1.6766869]"
4,db3e453ec4e2,1,"[1.4018849, 1.9339651, -2.6617582]","[2.0052373, 2.384714, -2.2006426]","[1.5351976, 2.1482847, -1.7772292]"
...,...,...,...,...,...
36760,9f63b687e76a,1,"[-0.1617219, 1.977808, -1.1096228]","[-1.0977044, 2.1887925, 0.04002488]","[-1.259913, 2.082045, 0.8110388]"
36761,9d5bd7d86212,1,"[0.4799295, 1.775178, -1.6695095]","[0.13648616, 2.0000517, -1.1262543]","[0.53358126, 1.6555904, -0.9122732]"
36762,f1b78becd573,1,"[-0.27862364, 1.7382803, -0.8383986]","[-0.19035743, 1.5058018, -0.38897157]","[0.18011151, 1.5651275, -0.5547949]"
36763,cc184624ca8e,0,"[0.44017765, 1.6714985, -1.5087857]","[0.9619142, 1.57134, -1.8059206]","[0.70432526, 1.733619, -1.2330422]"


In [12]:
labels = train['label'].values
all_logits = []
for i in range(len(oof_paths)):
    logits = np.vstack(train[f'logits{i}'].values)
    all_logits.append(logits)
    score = get_score(logits, labels)
    print(i, score)

0 0.6312894161383654
1 0.6399274641953246
2 0.6343677029917978


In [13]:
all_logits = np.stack(all_logits)
avg_logits = np.average(all_logits, weights=np.ones(len(oof_paths)), axis=0)
avg_logits.shape

(36765, 3)

In [14]:
get_score(avg_logits, labels)

0.6213799740439903

In [15]:
def get_score_pt(logits, labels):
    logits = torch.tensor(logits, requires_grad=False)
    labels = torch.tensor(labels, requires_grad=False)
    return torch.nn.functional.cross_entropy(logits, labels).item()

In [16]:
get_score_pt(avg_logits, labels)

0.6213799740439903