In [3]:
import pandas as pd
import numpy as np
import argparse
import os
import math
import torch
import torch.nn as nn
import torchaudio
from torch.utils.data import Dataset
from dataclasses import dataclass
from typing import Dict, List, Optional, Union, Any
import plotly.express as px
from sklearn.metrics import confusion_matrix
from scipy.stats import pearsonr, spearmanr

In [None]:
# Loads the thresholds computed from the ThresholdTuning_calibration.ipynb on the dev set.
# Applies the new thresholds to the logits on the eval data.
# Computes overall score by averaging for all 4 parts.

In [2]:
def corn_forward_logits(raw_logits: torch.Tensor):
    B, K1 = raw_logits.shape
    shifted_logits = torch.zeros_like(raw_logits)
    shifted_logits[:, 0] = raw_logits[:, 0]
    for k in range(1, K1):
        log_odds_prev = shifted_logits[:, k-1]
        shifted_logits[:, k] = raw_logits[:, k] + log_odds_prev
    return shifted_logits

def corn_inference(raw_logits: torch.Tensor, dict_fixedThresholds):
    """
    raw_logits: shape (B, K-1)
    Return integer predictions in [0..K].
    We'll do the same shifting, then threshold each probability at 0.5.
    """
    z = corn_forward_logits(raw_logits)  # shift
    p = torch.sigmoid(z)
    
    threshold_list = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
    for key in dict_fixedThresholds.keys():
        threshold_list[key] = dict_fixedThresholds[key]
       
    threshold_tensor = torch.tensor(threshold_list)
    
    pass_mask = raw_logits > threshold_tensor
                
    preds = pass_mask.sum(dim=1)
    return preds

In [5]:
# Load CSV into DataFrame
df = pd.read_csv("/m/triton/work/porwala1/slate_models/CornLoss_TrainDataModel_EvalPreds_withProbs/final_locked_thresholds_025.csv")
df["score"] = (df["score"] * 2 - 5).astype(int)

# Create dictionaries for each threshold column
dict_Thresholds_p1 = dict(zip(df["score"], df["p1_threshold"]))
dict_Thresholds_p3 = dict(zip(df["score"], df["p3_threshold"]))
dict_Thresholds_p4 = dict(zip(df["score"], df["p4_threshold"]))
dict_Thresholds_p5 = dict(zip(df["score"], df["p5_threshold"]))

# Print dictionaries
print("p1_dict:", dict_Thresholds_p1)
print("p3_dict:", dict_Thresholds_p3)
print("p4_dict:", dict_Thresholds_p4)
print("p5_dict:", dict_Thresholds_p5)

p1_dict: {6: 0.02, 5: 0.52, 4: 0.55, 3: 0.75, 2: 0.9, 1: 0.62, 0: 0.7}
p3_dict: {6: 0.05, 5: 0.55, 4: 0.62, 3: 0.88, 2: 0.7, 1: 0.62, 0: 0.8}
p4_dict: {6: 0.38, 5: 0.2, 4: 0.3, 3: 0.32, 2: 0.4, 1: 0.85, 0: 0.8}
p5_dict: {6: 0.48, 5: 0.1, 4: 0.38, 3: 0.68, 2: 0.78, 1: 0.88, 0: 0.98}


In [6]:
path = "/m/triton/work/porwala1/slate_models/CornLoss_TrainDataModel_EvalPreds_withProbs"

In [7]:
P1_csv = pd.read_csv(path + "/eval-preds-P1-withProb.csv") #Get from CSV
subIDs = P1_csv["submissionID"].to_list()
pred_logits_1_df = P1_csv.drop(columns=['score', 'submissionID'])
pred_logits_1 = pred_logits_1_df.to_numpy()
logits_t_1 = torch.from_numpy(pred_logits_1)

preds_t_1 = corn_inference(logits_t_1, dict_Thresholds_p1)
final_preds_1 = preds_t_1.cpu().numpy()
final_preds_rescaled_1 = final_preds_1/2 + 2

In [8]:
P3_csv = pd.read_csv(path + "/eval-preds-P3-withProb.csv") #Get from CSV
pred_logits_3_df = P3_csv.drop(columns=['score', 'submissionID'])
pred_logits_3 = pred_logits_3_df.to_numpy()
logits_t_3 = torch.from_numpy(pred_logits_3)

preds_t_3 = corn_inference(logits_t_3, dict_Thresholds_p3)
final_preds_3 = preds_t_3.cpu().numpy()
final_preds_rescaled_3 = final_preds_3/2 + 2

In [9]:
P4_csv = pd.read_csv(path + "/eval-preds-P4-withProb.csv") #Get from CSV
pred_logits_4_df = P4_csv.drop(columns=['score', 'submissionID'])
pred_logits_4 = pred_logits_4_df.to_numpy()
logits_t_4 = torch.from_numpy(pred_logits_4)

preds_t_4 = corn_inference(logits_t_4, dict_Thresholds_p4)
final_preds_4 = preds_t_4.cpu().numpy()
final_preds_rescaled_4 = final_preds_4/2 + 2

In [10]:
P5_csv = pd.read_csv(path + "/eval-preds-P5-withProb.csv") #Get from CSV
pred_logits_5_df = P5_csv.drop(columns=['score', 'submissionID'])
pred_logits_5 = pred_logits_5_df.to_numpy()
logits_t_5 = torch.from_numpy(pred_logits_5)

preds_t_5 = corn_inference(logits_t_5, dict_Thresholds_p5)
final_preds_5 = preds_t_5.cpu().numpy()
final_preds_rescaled_5 = final_preds_5/2 + 2

In [11]:
predOverallScore = np.mean([final_preds_rescaled_1, final_preds_rescaled_3, final_preds_rescaled_4, final_preds_rescaled_5], axis=0)

In [12]:
df = pd.DataFrame({"subID": subIDs, "score": predOverallScore})
df.to_csv(path + "/sla.tsv", sep="\t", index=False, header=False)