In [10]:
import numpy as np
import pandas as pd
import os
from sklearn.metrics import mean_squared_error

In [11]:
def correlation_score(y_true, y_pred):
    """Scores the predictions according to the competition rules. 
    
    It is assumed that the predictions are not constant.
    
    Returns the average of each sample's Pearson correlation coefficient"""
    if type(y_true) == pd.DataFrame: y_true = y_true.values
    if type(y_pred) == pd.DataFrame: y_pred = y_pred.values
    corrsum = 0
    for i in range(len(y_true)):
        corrsum += np.corrcoef(y_true[i], y_pred[i])[1, 0]
    return corrsum / len(y_true)

In [12]:
DATA_DIR = "../data/open-problems-multimodal/"


FP_CITE_TEST_TARGETS = os.path.join(DATA_DIR,"cite_day4_target.h5")


In [13]:
import math
import torch
from sklearn.metrics import roc_auc_score
cite_test_y = pd.read_hdf(FP_CITE_TEST_TARGETS).values

test_pred1 = np.load("sub_preds.npy")
test_pred2 = np.load("cite_lgb.npy")
all = []

for w in np.arange(0,1,0.01):
    ensemble_pred = w * test_pred1 + (1-w) * test_pred2
    ensemble_auc = correlation_score(cite_test_y , ensemble_pred)
    all.append( ensemble_auc )
best_weight = np.argmax( all )/100
print(best_weight)

ensemble_pred = best_weight * test_pred1 + (1-best_weight) * test_pred2


ensemble_pred = torch.as_tensor(ensemble_pred, dtype=None, device='cuda')
cite_test_y = torch.as_tensor(cite_test_y, dtype=None, device='cuda')


import torch.nn as nn
mse = nn.MSELoss()
score = math.sqrt(mse(ensemble_pred,cite_test_y))
print(score)
corrscore = correlation_score(cite_test_y.cpu(),ensemble_pred.cpu())
print(corrscore)

0.97
2.6483905844935305
0.8910035839045417
