In [1]:
import logging
import os
import random
import sys
from dataclasses import dataclass, field
from typing import Optional
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch
import numpy as np
from datasets import load_dataset, load_metric
from transformers import EvalPrediction
import pandas as pd
from skmultilearn.model_selection import IterativeStratification
from transformers.modeling_roberta import RobertaPreTrainedModel, RobertaModel, RobertaClassificationHead, SequenceClassifierOutput
from torch import nn
import transformers
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoModelForPreTraining,
    AutoTokenizer,
    EvalPrediction,
    HfArgumentParser,
    PretrainedConfig,
    Trainer,
    TrainingArguments,
    default_data_collator,
    set_seed,
)
from transformers.trainer_utils import is_main_process

logger = logging.getLogger(__name__)
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss, MSELoss
import pandas as pd
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
# eat = pd.read_json('drive/MyDrive/eat_train.json')
eat = pd.read_json('eat_train.json')

10 fold stratified cross validation

In [3]:
kfold = IterativeStratification(n_splits=10, random_state=42)
# we create the folds once, and always use those.
for fold, (train_index, val_index) in enumerate(kfold.split(X=eat, y=eat[['label', 'breakpoint']])):
    eat.loc[val_index, 'fold'] = int(fold) # fold to predict on
    
eat['fold'] = eat['fold'].astype(int)



# Define datasets

Define dataset and n of labels

In [4]:
class EATDatasetTask1(Dataset):
    """Custom EAT Dataset class"""

    def __init__(self, df, tokenizer):
        self.tokens, self.y = [], []

#         self.y = df['breakpoint'].replace(-1, 0).values
        self.y = df['label'].values
        for ix, row in df.iterrows():
            first_sentence = " ".join(row['story'][0:2])
            rest = " ".join(row['story'][2:])

            one_story = tokenizer.encode_plus(first_sentence, rest, padding='max_length',
                                        max_length=100, truncation=True)
            
            
            self.tokens.append(one_story)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val) for key, val in self.tokens[idx].items()}
        item['labels'] = torch.tensor(self.y[idx])
        return item
    
def compute_metrics(pred: EvalPrediction):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

class EATDatasetTask2(Dataset):
    """Custom EAT Dataset class"""

    def __init__(self, df, tokenizer):
        self.tokens, self.y = [], []

        self.y = df['breakpoint'].replace(-1, 0).values
        for ix, row in df.iterrows():
            first_sentence = " ".join(row['story'][0:2])
            rest = " ".join(row['story'][2:])

            one_story = tokenizer.encode_plus(first_sentence, rest, padding='max_length',
                                        max_length=100, truncation=True)
            
            
            self.tokens.append(one_story)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val) for key, val in self.tokens[idx].items()}
        item['labels'] = torch.tensor(self.y[idx])
        return item

label_list_task2 = eat['breakpoint'].replace(-1, 0).unique()#eat_with_folds['label'].unique() #datasets["train"].unique("label")
model_name_or_path = 'roberta-large-mnli'
label_list_task2.sort()  # Let's sort it for determinism
num_labels_task2 = len(label_list_task2)

label_list_task1 = eat['label'].unique()#eat_with_folds['label'].unique() #datasets["train"].unique("label")
model_name_or_path = 'roberta-large-mnli'
label_list_task1.sort()  # Let's sort it for determinism
num_labels_task1 = len(label_list_task1)

# Define models

In [5]:
class RobertaClassificationHead(nn.Module):
    """Head for sentence-level classification tasks."""

    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(1024, 1024)
        self.dropout = nn.Dropout(0.1)
        self.final = nn.Linear(1024, 2)

    def forward(self, features, **kwargs):
        x = features[:, 0, :]  # take <s> token (equiv. to [CLS])
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.final(x)
        return x
    
    
class RobertaForSequenceClassification(RobertaPreTrainedModel):
    _keys_to_ignore_on_load_missing = [r"position_ids"]

    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.roberta = RobertaModel(config, add_pooling_layer=False)
        self.classifier = RobertaClassificationHead(config)
        self.init_weights()
        
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.roberta(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        
        sequence_output = outputs[2][-3]
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            if self.num_labels == 1:
                #  We are doing regression
                loss_fct = MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        if not return_dict:
            output = (logits,) + outputs[3:] #[-1]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,#[-1],
            attentions=outputs.attentions,
        )

In [6]:
class RobertaClassificationHeadTask2(nn.Module):
    """Head for sentence-level classification tasks."""

    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(1024, 1024)
        self.dropout = nn.Dropout(0.1)
        self.final = nn.Linear(1024, 6)

    def forward(self, features, **kwargs):
        x = features[:, 0, :]  # take <s> token (equiv. to [CLS])
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.final(x)
        return x
    
    
class RobertaForSequenceClassificationTask2(RobertaPreTrainedModel):
    _keys_to_ignore_on_load_missing = [r"position_ids"]

    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.roberta = RobertaModel(config, add_pooling_layer=False)
        self.classifier = RobertaClassificationHeadTask2(config)
        self.init_weights()
        
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.roberta(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        
        sequence_output = outputs[2][-3]
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            if self.num_labels == 1:
                #  We are doing regression
                loss_fct = MSELoss()
                loss = loss_fct(logits.view(-1), labels.view(-1))
            else:
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        if not return_dict:
            output = (logits,) + outputs[3:] #[-1]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,#[-1],
            attentions=outputs.attentions,
        )

# Define training arguments

In [7]:
training_args = TrainingArguments(output_dir='tmp',
                                overwrite_output_dir=True, 
                                do_train=True, 
                                do_eval=True,
                                evaluation_strategy='epoch',
                                per_device_train_batch_size=8,
                                per_device_eval_batch_size=8,
                                gradient_accumulation_steps=1,
                                learning_rate=2e-06,
                                weight_decay=0.0, 
                                adam_beta1=0.9, 
                                adam_beta2=0.999, 
                                adam_epsilon=1e-08, 
                                max_grad_norm=1.0, 
                                num_train_epochs=40.0,
                                max_steps=-1, 
                                warmup_steps=0,
                                logging_dir='runs/whataver', 
                                logging_first_step=False, 
                                logging_steps=100, 
                                load_best_model_at_end=True,
                                metric_for_best_model='f1',
                                seed=42, 
                                eval_steps=100,
                                dataloader_num_workers=0)

Training itself - don't run if the models are already trained and saved!

# Training task 1

In [None]:
res = []
for fold in range(0, 10):
    print("fold n#{}".format(fold))
    train = eat[eat['fold'] != fold]
    val = eat[eat['fold'] == fold]
    
    config = AutoConfig.from_pretrained(
        model_name_or_path,
        num_labels=num_labels_task1,
        output_hidden_states = True
    )
    
    # Load pretrained model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        model_name_or_path,
        use_fast=True,
    )
    
    model = RobertaForSequenceClassification.from_pretrained(
        model_name_or_path,
        from_tf=False,
        config=config,
    )
    
    datasets = {}
    datasets['train'] = EATDatasetTask1(train, tokenizer)
    datasets['val'] = EATDatasetTask1(val, tokenizer)
    
    train_dataset = datasets["train"]
    eval_dataset = datasets["val"]
    
    ####################### TRAIN ######################
    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.
        data_collator=default_data_collator,
    )
    
    trainer.train(
        model_path=None
    )
    
    res.append(trainer.evaluate(eval_dataset=eval_dataset))
    
    trainer.save_model(f'roberta_large_mnli_3layer_task_1_fold_{fold}')  # Saves the tokenizer too for easy upload

In [None]:
# print(f"accuracy is {np.mean([i['eval_accuracy'] for i in res])}")
# print(f"STD of acc is {np.std([i['eval_accuracy'] for i in res])}")

# Training Task 2

In [None]:
training_args = TrainingArguments(output_dir='tmp',
                                overwrite_output_dir=True, 
                                do_train=True, 
                                do_eval=True,
                                evaluation_strategy='epoch',
                                per_device_train_batch_size=8,
                                per_device_eval_batch_size=8,
                                gradient_accumulation_steps=1,
                                learning_rate=2e-06,
                                weight_decay=0.0, 
                                adam_beta1=0.9, 
                                adam_beta2=0.999, 
                                adam_epsilon=1e-08, 
                                max_grad_norm=1.0, 
                                num_train_epochs=40.0,
                                max_steps=-1, 
                                warmup_steps=0,
                                logging_dir='runs/whataver', 
                                logging_first_step=False, 
                                logging_steps=100, 
                                seed=42, 
                                eval_steps=100,
                                dataloader_num_workers=0)

Training for task 2 - don't run if already trained and models are saved!

In [None]:
res = []
for fold in range(0, 10):
    print("fold n#{}".format(fold))
    train = eat[eat['fold'] != fold]
    val = eat[eat['fold'] == fold]
    
    config = AutoConfig.from_pretrained(
        model_name_or_path,
        num_labels=num_labels_task2,
        output_hidden_states = True
    )
    
    # Load pretrained model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        model_name_or_path,
        use_fast=True,
    )
    
    model = RobertaForSequenceClassificationTask2.from_pretrained(
        model_name_or_path,
        from_tf=False,
        config=config,
    )
    
    datasets = {}
    datasets['train'] = EATDatasetTask2(train, tokenizer)
    datasets['val'] = EATDatasetTask2(val, tokenizer)
    
    train_dataset = datasets["train"]
    eval_dataset = datasets["val"]
    
    ####################### TRAIN ######################
    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.
        data_collator=default_data_collator,
    )
    
    trainer.train(
        model_path=None
    )
    
    res.append(trainer.evaluate(eval_dataset=eval_dataset))
    
    trainer.save_model(f'roberta_large_mnli_task_2_fold_{fold}')  # Saves the tokenizer too for easy upload

In [None]:
# print(f"Precision is {np.mean([i['eval_precision'] for i in res])}")
# print(f"STD of precision is {np.std([i['eval_precision'] for i in res])}")

In [None]:
# print(f"Recall is {np.mean([i['eval_recall'] for i in res])}")
# print(f"STD of recall is {np.std([i['eval_recall'] for i in res])}")

In [None]:
# print(f"F1 is {np.mean([i['eval_f1'] for i in res])}")
# print(f"STD of f1 is {np.std([i['eval_f1'] for i in res])}")

# Full predict on both tasks

In [8]:
class EATTestDataset(Dataset):
    """Custom EAT Dataset class"""

    def __init__(self, df, tokenizer):
        self.tokens= []
        for ix, row in df.iterrows():
            first_sentence = " ".join(row['story'][0:2])
            rest = " ".join(row['story'][2:])

            one_story = tokenizer.encode_plus(first_sentence, rest, padding='max_length',
                                        max_length=100, truncation=True)
            
            
            self.tokens.append(one_story)

    def __len__(self):
        return len(self.tokens)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val) for key, val in self.tokens[idx].items()}
        return item
    
test = pd.read_json('eat_test_unlabeled.json')

In [9]:
all_predictions_task1 = list()
all_predictions_task2 = list()

for fold in range(0, 10):
    
    train = eat[eat['fold'] != fold]
    val = eat[eat['fold'] == fold]
    
     # saved in the same directory, one folder per fold per task
    model_name_or_path_task1 = f'roberta_large_mnli_3layer_task_1_fold_{fold}'
    model_name_or_path_task2 = f'roberta_large_mnli_task_2_fold_{fold}'

    config_task1 = AutoConfig.from_pretrained(
        model_name_or_path_task1,
        num_labels=num_labels_task1,
        output_hidden_states = True
    )
    
    config_task2 = AutoConfig.from_pretrained(
        model_name_or_path_task2,
        num_labels=num_labels_task2,
        output_hidden_states = True
    )

    tokenizer_task1 = AutoTokenizer.from_pretrained(
        model_name_or_path_task1,
        use_fast=True,
    )
    
    tokenizer_task2 = AutoTokenizer.from_pretrained(
        model_name_or_path_task2,
        use_fast=True,
    )
    
    ############ PREDICT TASK 1 ##################
    # just for sanity check that we had good accuracy on val
    train_dataset = EATDatasetTask1(train, tokenizer_task1)
    test_ds = EATTestDataset(test, tokenizer_task1)
    
    model = RobertaForSequenceClassification.from_pretrained(
        model_name_or_path_task1,
        from_tf=False,
        config=config_task1,
    )
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=train_dataset,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer_task1,
        # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.
        data_collator=default_data_collator,
    )
    
    print(trainer.evaluate(eval_dataset=train_dataset))
    
    preds_task_1 = trainer.predict(test_ds)
    all_predictions_task1.append(preds_task_1)
    
    ############ PREDICT TASK 2 ##################
    train_dataset = EATDatasetTask2(train, tokenizer_task2)
    test_ds = EATTestDataset(test, tokenizer_task2)
    
    model = RobertaForSequenceClassificationTask2.from_pretrained(
        model_name_or_path_task2,
        from_tf=False,
        config=config_task2,
    )
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=train_dataset,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer_task2,
        # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.
        data_collator=default_data_collator,
    )
    
    print(trainer.evaluate(eval_dataset=train_dataset))
    
    preds_task_2 = trainer.predict(test_ds)
    all_predictions_task2.append(preds_task_2)

{'eval_loss': 0.17259874939918518, 'eval_accuracy': 0.9478168264110756, 'eval_f1': 0.9478083026213251, 'eval_precision': 0.9480726107363662, 'eval_recall': 0.9478042916118496}


{'eval_loss': 0.18603016436100006, 'eval_accuracy': 0.9659211927582535, 'eval_f1': 0.7927943402446456, 'eval_precision': 0.7972706581507997, 'eval_recall': 0.7885279105271077}


  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.2329571396112442, 'eval_accuracy': 0.9712765957446808, 'eval_f1': 0.9712757830404889, 'eval_precision': 0.9713299377475948, 'eval_recall': 0.9712765957446808}


{'eval_loss': 0.2879768908023834, 'eval_accuracy': 0.9521276595744681, 'eval_f1': 0.7831849744768701, 'eval_precision': 0.7889962145243836, 'eval_recall': 0.7778053952872651}


  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.18549077212810516, 'eval_accuracy': 0.9478723404255319, 'eval_f1': 0.9478292978249371, 'eval_precision': 0.9493552738553216, 'eval_recall': 0.9478723404255319}


{'eval_loss': 0.2245652824640274, 'eval_accuracy': 0.9585106382978723, 'eval_f1': 0.7910595911219359, 'eval_precision': 0.7928163152592793, 'eval_recall': 0.789407144446676}


  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.24287547171115875, 'eval_accuracy': 0.9595314164004259, 'eval_f1': 0.9595236582754053, 'eval_precision': 0.9598431952931349, 'eval_recall': 0.9595177607403711}


{'eval_loss': 0.30689549446105957, 'eval_accuracy': 0.9371671991480298, 'eval_f1': 0.749790965277393, 'eval_precision': 0.7641750581074671, 'eval_recall': 0.7396295399861669}


  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.18740876019001007, 'eval_accuracy': 0.973404255319149, 'eval_f1': 0.9734035028152674, 'eval_precision': 0.9734950014488555, 'eval_recall': 0.9734199792665426}


{'eval_loss': 0.28691983222961426, 'eval_accuracy': 0.9468085106382979, 'eval_f1': 0.7745163044810296, 'eval_precision': 0.7867194566213893, 'eval_recall': 0.7638334100232503}


  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.13564711809158325, 'eval_accuracy': 0.973404255319149, 'eval_f1': 0.9734039844226572, 'eval_precision': 0.973423543738767, 'eval_recall': 0.9734042553191489}


{'eval_loss': 0.26495271921157837, 'eval_accuracy': 0.9457446808510638, 'eval_f1': 0.7714096049004868, 'eval_precision': 0.7855369874834744, 'eval_recall': 0.7596626883253363}


  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.3708890676498413, 'eval_accuracy': 0.8519701810436635, 'eval_f1': 0.8511897771854197, 'eval_precision': 0.859320714861447, 'eval_recall': 0.8518940253141587}


{'eval_loss': 0.23418597877025604, 'eval_accuracy': 0.9542066027689031, 'eval_f1': 0.7826774987522253, 'eval_precision': 0.7871742960400367, 'eval_recall': 0.7785448372475311}


  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.16951222717761993, 'eval_accuracy': 0.9744680851063829, 'eval_f1': 0.9744662356712119, 'eval_precision': 0.9746055896683512, 'eval_recall': 0.9744680851063829}


{'eval_loss': 0.20769107341766357, 'eval_accuracy': 0.9648936170212766, 'eval_f1': 0.7903866459959187, 'eval_precision': 0.7954240970065919, 'eval_recall': 0.7857847883028235}


  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.20725375413894653, 'eval_accuracy': 0.9468085106382979, 'eval_f1': 0.9467738136222799, 'eval_precision': 0.9481330155487458, 'eval_recall': 0.9468671202676336}


{'eval_loss': 0.2717283070087433, 'eval_accuracy': 0.9478723404255319, 'eval_f1': 0.7817967259357811, 'eval_precision': 0.7834209050407176, 'eval_recall': 0.7817445023013757}


  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.2784635126590729, 'eval_accuracy': 0.9030883919062833, 'eval_f1': 0.9030602462945211, 'eval_precision': 0.9035106044779508, 'eval_recall': 0.9030712697908634}


{'eval_loss': 0.2176772952079773, 'eval_accuracy': 0.9637912673056444, 'eval_f1': 0.7955637404577739, 'eval_precision': 0.7957247122404189, 'eval_recall': 0.7955255136737286}


  _warn_prf(average, modifier, msg_start, len(result))


# Ensemble - take the average of raw logits

In [10]:
average_logits_task1 = np.concatenate([p.predictions[:,:,None] for p in all_predictions_task1], axis=2).mean(axis=2)
average_logits_task2 = np.concatenate([p.predictions[:,:,None] for p in all_predictions_task2], axis=2).mean(axis=2)

In [12]:
preds_task1 = pd.Series(average_logits_task1.argmax(-1))
preds_task2 = pd.Series(average_logits_task2.argmax(-1)).replace(0, -1) # we replace it back

In [13]:
final_pred_df = pd.DataFrame({'id': test['id'], 'pred_label': preds_task1, 'pred_breakpoint': preds_task2})

In [14]:
# cases where task 1 model said it's plausible (label = 1), but task 2 model said it is not - we trust model 1 
# more (higher acc) and set predictions on task 2 to -1
final_pred_df.loc[((final_pred_df['pred_label'] == 1) & (final_pred_df['pred_breakpoint'] != -1)).values, 'pred_breakpoint'] = -1

In [15]:
# cases where task 1 model said it's implausible (label=0), but task2 predicted -1 - plausible
# in those cases, as I don't know which breakpoint should it be, just say it is plausible.
# could it be better to take second max class after -1, as model 1 is better?
final_pred_df.loc[((final_pred_df['pred_label'] == 0) & (final_pred_df['pred_breakpoint'] == -1)).values, 'pred_label'] = 1

Here we trust more to task1 model, so whenever we have a conflict we use task 1 predictions.

Sanity check - should be 100% match

In [16]:
zeros_only1 = final_pred_df['pred_label'] == 1

In [17]:
zeros_only2 = final_pred_df['pred_breakpoint'] == -1

In [18]:
(zeros_only1 == zeros_only2).sum() / len(zeros_only2)

1.0

In [20]:
import json
with open('EAT_22_preds.json', 'w') as outfile:
    json.dump(final_pred_df.to_dict(orient='records'), outfile)