In [5]:
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss
from scipy.special import softmax
import torch
def get_score(logits, labels):
    probs = softmax(logits, axis=1)
    probs = np.clip(probs, 1e-15, 1 - 1e-15)
    return log_loss(labels, probs, labels=[0, 1, 2])
LABEL_MAPPING = {"Ineffective": 0, "Adequate": 1, "Effective": 2}
# oof_paths = [
#     '../ckpt/train2/exp2/oof.gz',
#     '../ckpt/train2/exp3/oof.gz',
#     '../ckpt/train2/exp4/oof.gz',
#     '../ckpt/train2/exp5/oof.gz',
#     '../ckpt/train2/exp6/oof.gz',
# ]
oof_paths = [
    '../ckpt/train6/exp5/oof.gz',
    '../ckpt/train8/exp0/oof.gz',
]

In [6]:
train = pd.read_csv('../data/train_processed.csv')
train['label'] = train['discourse_effectiveness'].map(LABEL_MAPPING)
train = train[['discourse_id', 'label']]
train

Unnamed: 0,discourse_id,label
0,0013cc385424,1
1,9704a709b505,1
2,c22adee811b6,1
3,a10d361e54e4,1
4,db3e453ec4e2,1
...,...,...
36760,9f63b687e76a,1
36761,9d5bd7d86212,1
36762,f1b78becd573,1
36763,cc184624ca8e,0


In [7]:
for i, oof_path in enumerate(oof_paths):
    oof = pd.read_pickle(oof_path)
    assert(len(oof)==len(train))
    oof = oof.rename(columns={'logits': f'logits{i}'})
    train = train.merge(oof, on='discourse_id', how='inner')
train

Unnamed: 0,discourse_id,label,logits0,logits1
0,0013cc385424,1,"[-0.55423874, 1.3770996, -2.1689012]","[-0.27665946, 1.7984412, -1.2620152]"
1,9704a709b505,1,"[-0.9240722, 1.7347128, -2.1513329]","[-0.9741463, 2.3374555, -0.9911482]"
2,c22adee811b6,1,"[-0.35993454, 1.5169164, -2.7781775]","[-0.49320546, 2.2687898, -1.625973]"
3,a10d361e54e4,1,"[0.6902696, 1.2148558, -3.4480884]","[0.96353287, 1.8416973, -2.3474236]"
4,db3e453ec4e2,1,"[0.49720484, 1.2258257, -3.465635]","[0.7780552, 1.944899, -2.4123056]"
...,...,...,...,...
36760,9f63b687e76a,1,"[-1.8615559, 1.8152055, 0.0059974026]","[-1.7218369, 1.6211671, -0.26119515]"
36761,9d5bd7d86212,1,"[-0.33728024, 1.7679042, -1.242108]","[-0.7063968, 1.4261082, -1.3764397]"
36762,f1b78becd573,1,"[-1.1473719, 1.5399636, -0.10791637]","[-1.0087718, 1.3393435, -0.6893582]"
36763,cc184624ca8e,0,"[-0.2121445, 1.5096043, -0.78673744]","[-0.44260955, 1.2745717, -1.3457791]"


In [8]:
labels = train['label'].values
all_logits = []
for i in range(len(oof_paths)):
    logits = np.vstack(train[f'logits{i}'].values)
    all_logits.append(logits)
    score = get_score(logits, labels)
    print(i, score)

0 0.6111776288292119
1 0.6038716750108646


In [9]:
all_logits = np.stack(all_logits)
avg_logits = np.average(all_logits, weights=np.ones(len(oof_paths)), axis=0)
avg_logits.shape

(36765, 3)

In [10]:
get_score(avg_logits, labels)

0.59928258544284

In [11]:
def get_score_pt(logits, labels):
    logits = torch.tensor(logits, requires_grad=False)
    labels = torch.tensor(labels, requires_grad=False)
    return torch.nn.functional.cross_entropy(logits, labels).item()

In [12]:
get_score_pt(avg_logits, labels)

0.5992825854428401