# **MODEL ENSEMBLING - DAY TIME**

Mounting Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Initial setup

In [3]:
import pickle
import pandas as pd
from itertools import combinations
from sklearn.metrics import ndcg_score, accuracy_score, precision_score
from sklearn.preprocessing import StandardScaler
import numpy as np
import warnings

In [4]:
route = '/content/drive/MyDrive/IMPACT PROJECT/ALL PREDS/day'

%cd {route}

/content/drive/MyDrive/IMPACT PROJECT/ALL PREDS/day


Loading the predictions

In [5]:
with open('day_labels.pkl', 'rb') as f:
        labels = pickle.load(f)

with open('day_naml.pkl', 'rb') as f:
        naml = pickle.load(f)

with open('day_nrms.pkl', 'rb') as f:
        nrms = pickle.load(f)

with open('day_lstur.pkl', 'rb') as f:
        lstur = pickle.load(f)

----

Function definitions

In [6]:
def dcg_score(y_true, y_score, k=10):
    """
    Computing dcg score metric at k.
    """
    k = min(np.shape(y_true)[-1], k)
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(len(y_true)) + 2)
    return np.sum(gains / discounts)

def ndcg_score(y_true, y_score, k=10):
    """
    Computing ndcg score metric at k.
    """
    best = dcg_score(y_true, y_true, k)
    actual = dcg_score(y_true, y_score, k)
    return actual / best

def calculate_ndcg(y_true, y_pred, k):
    """
    Computing ndcg score metric at k.
    """
    ndcg = np.mean(
        [
            ndcg_score(each_labels, each_preds, k)
            for each_labels, each_preds in zip(y_true, y_pred)
        ]
    )
    return ndcg

def normalize_array(arr):
    """
    Scales and normalizes an array.
    """
    scaler = StandardScaler()
    normalized_arr = scaler.fit_transform(arr.reshape(-1, 1))
    return normalized_arr.flatten()

def transform_to_binary_with_threshold(arr, threshold):
    """
    Transforms an array filled with predictions to a binary one.
    """
    return [1 if value >= threshold else 0 for value in arr]

def calculate_precision(y_true, y_pred):
    """
    Calculates the precision @10
    """
    first_10_elements = [arr[:10] for arr in y_pred]
    first_10_elements_true = [arr[:10] for arr in y_true]
    normalized_array = normalize_array(np.concatenate(first_10_elements))
    binary_result = transform_to_binary_with_threshold(normalized_array, 0.9)
    prec = precision_score(np.concatenate(first_10_elements_true), binary_result)
    return prec

def calculate_mean(predictions):
    """
    Calculate mean predictions between a dictionary of preds
    """
    y_pred_mean = np.mean(list(predictions.values()), axis=0)
    return y_pred_mean.tolist()

In [7]:
predictions = {'nrms': nrms, 'naml': naml, 'lstur': lstur}

In [8]:
# Suppress the warning
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

# Calculating the best combination of models

best_ndcg_10 = 0
best_ndcg_5 = 0
best_precision = 0

best_combination_10 = ()
best_combination_5 = ()
best_combination_p = ()

for r in range(1, len(predictions) + 1):
    for subset in combinations(predictions.keys(), r):
        subset_predictions = {model: predictions[model] for model in subset}
        y_pred = calculate_mean(subset_predictions)

        ndcg_10 = calculate_ndcg(labels, y_pred, k=10)
        ndcg_5 = calculate_ndcg(labels, y_pred, k=5)
        precision = calculate_precision(labels, y_pred)

        print(f"Model combination {subset} yields NDCG@5: {ndcg_5}, NDCG@10: {ndcg_10} and Precision: {precision}")

        if ndcg_10 > best_ndcg_10:
            best_ndcg_10 = ndcg_10
            best_combination_10 = subset

        if ndcg_5 > best_ndcg_5:
            best_ndcg_5 = ndcg_5
            best_combination_5 = subset

        if precision > best_precision:
            best_precision = precision
            best_combination_p = subset

print(f"\n-- Best  NDCG@5 = {best_ndcg_5} by model combination {best_combination_5} --")
print(f"-- Best  NDCG@10 = {best_ndcg_10} by model combination {best_combination_10} --")
print(f"-- Best  precision = {best_precision} by model combination {best_combination_p} --")

Model combination ('nrms',) yields NDCG@5: 0.3337851479169056, NDCG@10: 0.39777551912622316 and Precision: 0.13417275303550447
Model combination ('naml',) yields NDCG@5: 0.32870334602188717, NDCG@10: 0.39179276855170014 and Precision: 0.12808374449926657
Model combination ('lstur',) yields NDCG@5: 0.33272498915308435, NDCG@10: 0.39470115998832794 and Precision: 0.12967746522264617
Model combination ('nrms', 'naml') yields NDCG@5: 0.3373384001281302, NDCG@10: 0.40186728949016987 and Precision: 0.13343904720618768
Model combination ('nrms', 'lstur') yields NDCG@5: 0.33982629115731666, NDCG@10: 0.4032789146025963 and Precision: 0.13689243757077982
Model combination ('naml', 'lstur') yields NDCG@5: 0.33722696885837256, NDCG@10: 0.39935540778257733 and Precision: 0.13562544968650428
Model combination ('nrms', 'naml', 'lstur') yields NDCG@5: 0.3408714498767374, NDCG@10: 0.40404714090870075 and Precision: 0.1371941922923557

-- Best  NDCG@5 = 0.3408714498767374 by model combination ('nrms', '