In [53]:
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss
from scipy.special import softmax
import torch
def get_score(logits, labels):
    probs = softmax(logits, axis=1)
    probs = np.clip(probs, 1e-15, 1 - 1e-15)
    return log_loss(labels, probs, labels=[0, 1, 2])
LABEL_MAPPING = {"Ineffective": 0, "Adequate": 1, "Effective": 2}
oof_paths = [
    '../ckpt/train2/exp2/oof.gz',
    '../ckpt/train2/exp3/oof.gz',
    '../ckpt/train2/exp4/oof.gz',
    '../ckpt/train2/exp5/oof.gz',
    '../ckpt/train2/exp6/oof.gz',
]

In [47]:
train = pd.read_csv('../data/train_processed.csv')
train['label'] = train['discourse_effectiveness'].map(LABEL_MAPPING)
train = train[['discourse_id', 'label']]
train

Unnamed: 0,discourse_id,label
0,0013cc385424,1
1,9704a709b505,1
2,c22adee811b6,1
3,a10d361e54e4,1
4,db3e453ec4e2,1
...,...,...
36760,9f63b687e76a,1
36761,9d5bd7d86212,1
36762,f1b78becd573,1
36763,cc184624ca8e,0


In [48]:
for i, oof_path in enumerate(oof_paths):
    oof = pd.read_pickle(oof_path)
    assert(len(oof)==len(train))
    oof = oof.rename(columns={'logits': f'logits{i}'})
    train = train.merge(oof, on='discourse_id', how='inner')
train

Unnamed: 0,discourse_id,label,logits0,logits1,logits2,logits3,logits4
0,0013cc385424,1,"[-0.46031857, 1.7813711, -1.9664644]","[-0.6140231, 1.5066998, -2.2523015]","[-0.56019115, 2.2195306, -2.1535082]","[-0.021195952, 1.9591146, -2.007581]","[-0.7049733, 1.8275588, -1.8245045]"
1,9704a709b505,1,"[-0.42723534, 1.8592818, -2.2128015]","[-0.95859843, 1.6789515, -2.0493398]","[-0.04099991, 2.047031, -2.6756704]","[-0.014969771, 2.0407436, -2.3343713]","[-0.1493536, 2.0983508, -2.3136837]"
2,c22adee811b6,1,"[0.15867966, 1.7871516, -2.8910418]","[-0.15520751, 1.7600977, -2.8947525]","[0.68558234, 2.114406, -3.2444067]","[0.7452612, 1.7884086, -2.8813481]","[0.2764221, 2.0177443, -2.7083738]"
3,a10d361e54e4,1,"[0.63520634, 1.427369, -3.139961]","[0.3037146, 1.3286613, -3.2531557]","[1.9015044, 1.4797994, -4.0157447]","[1.2194382, 1.5418339, -3.1413999]","[0.53818876, 1.6855001, -3.0761151]"
4,db3e453ec4e2,1,"[0.4794284, 1.6968659, -3.1383736]","[0.13721484, 1.5346375, -3.130892]","[1.4675037, 1.6795334, -3.638989]","[0.7247659, 1.885518, -2.9172187]","[0.39932084, 1.8995979, -2.8832703]"
...,...,...,...,...,...,...,...
36760,9f63b687e76a,1,"[-1.9952922, 1.0980514, 0.54361457]","[0.19212024, 1.6193676, -2.4935746]","[0.31115654, 1.8274095, -2.2691782]","[0.3398771, 1.995057, -2.6529632]","[-2.2207813, 1.6704456, 0.92481893]"
36761,9d5bd7d86212,1,"[-1.4401857, 0.8391793, 0.22449546]","[0.7023564, 1.4870162, -2.8392453]","[1.4873837, 1.4813598, -3.1726115]","[1.1575893, 1.8049259, -2.9496005]","[-1.3575106, 1.5550723, 0.025750408]"
36762,f1b78becd573,1,"[-2.0051303, 0.6368906, 1.0647606]","[-0.121050134, 1.5590851, -2.0845437]","[0.1300626, 1.9361672, -2.0523477]","[0.69150925, 1.7345768, -2.7000914]","[-0.9334012, 1.117895, 0.117199875]"
36763,cc184624ca8e,0,"[-1.1840022, 0.75686187, 0.004210035]","[1.2925433, 1.2450668, -3.1974397]","[1.4883661, 1.5710609, -3.0251384]","[1.6175629, 1.380533, -3.1996403]","[-0.5641607, 1.4128256, -0.8317688]"


In [49]:
labels = train['label'].values
all_logits = []
for i in range(len(oof_paths)):
    logits = np.vstack(train[f'logits{i}'].values)
    all_logits.append(logits)
    score = get_score(logits, labels)
    print(i, score)

0 0.6764085818030449
1 0.6809523467299282
2 0.6747764379669976
3 0.6702194637654867
4 0.6714596813677053


In [50]:
all_logits = np.stack(all_logits)
avg_logits = np.average(all_logits, weights=np.ones(len(oof_paths)), axis=0)
avg_logits.shape

(36765, 3)

In [52]:
get_score(avg_logits, labels)

0.6428060037208623

In [60]:
def get_score_pt(logits, labels):
    logits = torch.tensor(logits, requires_grad=False)
    labels = torch.tensor(labels, requires_grad=False)
    return torch.nn.functional.cross_entropy(logits, labels).item()

In [61]:
get_score_pt(avg_logits, labels)

0.6428060037208623