In [1]:
import glob
import os
import numpy as np
import pandas as pd
import re
import codecs
import io
import matplotlib
import matplotlib.pyplot as plt
import sys
from torch import nn
from collections import defaultdict
from sklearn.metrics import classification_report, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, train_test_split, StratifiedKFold

import torch
import sentence_splitter
import spacy
from tqdm import tqdm

sys.path.insert(0, "../utils")
from data_loader import DataLoader
from data_writer import DataWriter
from pre_processor import PreProcessor
from error_analysis import ErrorAnalysis
from labels import label2id, id2label
from feature_eng import GetFeatures


def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

# pd.set_option('display.max_rows', -1)   

In [103]:
dataloader = DataLoader()
preprocessor = PreProcessor()

print("Loading train data")
train_df = pd.read_csv("../datasets/processed_data/train_data/features/train_data_v2.csv")
train_articles = dataloader.read_articles("../datasets/train-articles")
train_df["bert_encoding_id"] = list(range(len(train_df)))
print("     Done loading train data")

print("Loading test data")
test_df = pd.read_csv("../datasets/processed_data/dev_data/features/dev_data_v2.csv")
test_articles = dataloader.read_articles("../datasets/dev-articles")
test_df["bert_encoding_id"] = list(range(len(test_df)))
print("     Done loading test data")

Loading train data
Read 371 files with succes and 0 failed
     Done loading train data
Loading test data
Read 75 files with succes and 0 failed
     Done loading test data


In [3]:
train_emb_path = "../datasets/processed_data/train_data/embeddings/"
bert_hidden_states = torch.load(train_emb_path + "bert_hidden_states" + '.pt')

dev_emb_path = "../datasets/processed_data/dev_data/embeddings/"
bert_dev_hidden_states = torch.load(dev_emb_path + "bert_dev_hidden_states" + '.pt')


In [4]:
y_train = preprocessor.preb_targets(train_df, "gold_label_id")
y_train = torch.tensor(y_train).type(torch.FloatTensor)
y_true = train_df["gold_label"]

# Straitified 10 fold cross validationnn

In [5]:
skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state=1)

# Train log_reg

In [6]:
# Class-weights
labels = set(train_df["gold_label"])
label_counter = {label: train_df["gold_label"].values.tolist().count(label) for label in labels}
max_class = max(label_counter.values())
class_w = {label: max_class / value for label, value in label_counter.items()}

In [7]:
log_train_col = ['span_word_length',
 'article_one_word_counter',
 'article_span_sentence_counter',
 'word_resemble_factor',
 'word_count_span_sent']

In [8]:
kf = KFold(n_splits=5)

log_weighted_train_sm = np.zeros((len(train_df),14))
log_train_sm = np.zeros((len(train_df),14))

for fold_idx, (train_index, test_index) in enumerate(skf.split(train_df, y_true)):
    print("Fold:", fold_idx + 1)
    # data
    tmp_X_train = train_df.iloc[train_index]
    tmp_X_test = train_df.iloc[test_index]
    tmp_y_train = tmp_X_train["gold_label"]
    
    logreg = LogisticRegression(penalty='l2', solver="lbfgs")
    logreg_weighted = LogisticRegression(penalty='l2',class_weight = class_w, solver="lbfgs")
    
    # Weighted
    logreg_weighted.fit(tmp_X_train[log_train_col], tmp_y_train)
    log_weighted_train_sm[test_index] = logreg_weighted.predict_proba(tmp_X_test[log_train_col])
    # Not weighted
    logreg.fit(tmp_X_train[log_train_col], tmp_y_train)
    log_train_sm[test_index] = logreg.predict_proba(tmp_X_test[log_train_col])
    fold_idx += 1

Fold: 1
Fold: 2
Fold: 3
Fold: 4
Fold: 5


In [9]:
print("Weighted Logreg")

print(classification_report(y_true, [id2label[x] for x in np.argmax(log_weighted_train_sm, 1)]))

print("NOT Weighted Logreg")
print(classification_report(y_true, [id2label[x] for x in np.argmax(log_train_sm, 1)]))

Weighted Logreg
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.10      0.26      0.15       144
          Appeal_to_fear-prejudice       0.09      0.02      0.03       294
    Bandwagon,Reductio_ad_hitlerum       0.04      0.22      0.07        72
           Black-and-White_Fallacy       0.05      0.14      0.08       107
         Causal_Oversimplification       0.17      0.16      0.17       209
                             Doubt       0.24      0.02      0.04       493
         Exaggeration,Minimisation       0.18      0.25      0.21       466
                       Flag-Waving       0.00      0.00      0.00       229
                   Loaded_Language       0.77      0.16      0.27      2123
             Name_Calling,Labeling       0.24      0.33      0.28      1058
                        Repetition       0.52      0.58      0.55       621
                           Slogans       0.09      0.61      0.15      

# Abliation study functions

## fit model function

In [10]:
from sklearn.metrics import f1_score 

class ModelFit:
    def fit(self, X_train, y_train, X_dev=-1, y_dev=-1, hidden_1=100, hidden_2=100, learning_rate=1e-4, dev_size=0.1, seed=7, verbose=False):
        input_dim = X_train.shape[1]
        output_dim = 14
        if type(X_dev) == int:
            split_idx = int(len(X_train) * (1-dev_size))
            np.random.seed(seed)
            randperm = np.random.permutation(len(X_train))
            X_dev = X_train[randperm][split_idx:]
            X_train = X_train[randperm][:split_idx]
            dev_onehot_labels = y_train[randperm][split_idx:]
            train_onehot_labels = y_train[randperm][:split_idx]
        else:
            dev_onehot_labels = y_dev
            train_onehot_labels = y_train

        self.nnmodel = nn.Sequential(
            torch.nn.Linear(input_dim, hidden_1),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_1, hidden_2),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_2, output_dim),
        )

        loss_fn = nn.MSELoss(reduction='mean')

        optimizer = torch.optim.Adam(self.nnmodel.parameters(), lr=learning_rate)

        train_losses = []
        dev_losses = []
        old_dev_loss = np.infty
        for t in range(5000):

            y_pred = self.nnmodel(X_train)

            train_loss = loss_fn(y_pred, train_onehot_labels)
            train_losses.append(train_loss)

            new_dev_loss =  loss_fn(self.nnmodel(X_dev), dev_onehot_labels).item()
            dev_losses.append(new_dev_loss)
            if t%10 == 0:
                if verbose:
                    print(t, new_dev_loss)
                if new_dev_loss < old_dev_loss:
                    old_dev_loss = new_dev_loss
                    continue
                break
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()
        return self.nnmodel


    def predict(self, X_test):
        y_pred = self.nnmodel(X_test)
        return y_pred
    
    def get_label(self, y_pred):
        y_pred = y_pred.max(dim = 1)[1]
        y_pred = np.array([id2label[x] for x in y_pred.tolist()])
        return y_pred

## Validate model features

In [11]:
def validate_feature_combination(train_cols, train_df, y_true, y_train, w_features=True, w_bert=True, w_log=True, weighted_log=True): 
    # initialize skf
    skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state=1)
    # Result metric
    logits = np.zeros((len(train_df),14))
    
    with tqdm(total=10) as pbar:
        for fold_idx, (train_index, test_index) in enumerate(skf.split(train_df, y_true)):     
            
            # With features
            if w_features:
                
                # With logistic regression and features
                if w_log:
                    
                    # With Weighted logreg softmax
                    if weighted_log:
                        tmp_X_train = log_weighted_train_sm[train_index]
                        tmp_X_test = log_weighted_train_sm[test_index]
                    # Not weighted logreg
                    else:
                        tmp_X_train = log_train_sm[train_index]
                        tmp_X_test = log_train_sm[test_index]

                    # Concat logreg sm with features:
                    tmp_X_train = np.concatenate((tmp_X_train, train_df.iloc[train_index][train_cols].to_numpy()), 1)
                    tmp_X_test = np.concatenate((tmp_X_test,train_df.iloc[test_index][train_cols].to_numpy()), 1)
                
                # without logistic regression and but with features
                else:
                    tmp_X_train = train_df.iloc[train_index][train_cols].to_numpy()
                    tmp_X_test = train_df.iloc[test_index][train_cols].to_numpy()

                # with features and BERT and logreg if w_logreg
                if w_bert:
                    hidden_state_train_index = train_df["bert_encoding_id"].iloc[train_index].values
                    hidden_state_test_index = train_df["bert_encoding_id"].iloc[test_index].values

                    tmp_X_train = torch.cat((bert_hidden_states[hidden_state_train_index], torch.FloatTensor(tmp_X_train)), 1)
                    tmp_X_test = torch.cat((bert_hidden_states[hidden_state_test_index], torch.FloatTensor(tmp_X_test)), 1)
                # without bert but with feature and logreg if w_logreg
                else:
                    tmp_X_train = torch.FloatTensor(tmp_X_train)
                    tmp_X_test = torch.FloatTensor(tmp_X_test)
                    
            # Without features
            else:
                # With logistic regression and without features
                if w_log:
                    # Weighted or non weighted logreg softmax
                    if weighted_log:
                        tmp_X_train = log_weighted_train_sm[train_index]
                        tmp_X_test = log_weighted_train_sm[test_index]
                    else:
                        tmp_X_train = log_train_sm[train_index]
                        tmp_X_test = log_train_sm[test_index]
                    # wo features, but with bert and logreg
                    if w_bert:
                        hidden_state_train_index = train_df["bert_encoding_id"].iloc[train_index].values
                        hidden_state_test_index = train_df["bert_encoding_id"].iloc[test_index].values

                        tmp_X_train = torch.cat((bert_hidden_states[hidden_state_train_index], torch.FloatTensor(tmp_X_train)), 1)
                        tmp_X_test = torch.cat((bert_hidden_states[hidden_state_test_index], torch.FloatTensor(tmp_X_test)), 1)

                # Else without logistic regression and features
                # Same as only bert
                else:
                    hidden_state_train_index = train_df["bert_encoding_id"].iloc[train_index].values
                    hidden_state_test_index = train_df["bert_encoding_id"].iloc[test_index].values

                    tmp_X_train = bert_hidden_states[hidden_state_train_index]
                    tmp_X_test = bert_hidden_states[hidden_state_test_index]

            # Train model
            model = ModelFit()
            model.fit(tmp_X_train, y_train[train_index])

            preds = model.predict(tmp_X_test)
            logits[test_index] = np.array(preds.tolist())
            
            pbar.update(1)
    # print(classification_report(y_true, [id2label[x] for x in np.argmax(logits, 1)]))
    return [id2label[x] for x in np.argmax(logits, 1)]

In [97]:
# Test functionn with bert only
preds = validate_feature_combination(log_train_col, train_df, y_true, y_train, w_features=False, w_bert=True, w_log=False, weighted_log=False)
print(classification_report(y_true, preds))

100%|██████████| 10/10 [02:00<00:00, 12.53s/it]

                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.45      0.16      0.24       144
          Appeal_to_fear-prejudice       0.43      0.46      0.44       294
    Bandwagon,Reductio_ad_hitlerum       0.57      0.11      0.19        72
           Black-and-White_Fallacy       0.32      0.07      0.11       107
         Causal_Oversimplification       0.43      0.33      0.37       209
                             Doubt       0.56      0.76      0.64       493
         Exaggeration,Minimisation       0.55      0.44      0.49       466
                       Flag-Waving       0.59      0.59      0.59       229
                   Loaded_Language       0.71      0.85      0.77      2123
             Name_Calling,Labeling       0.72      0.80      0.76      1058
                        Repetition       0.65      0.53      0.58       621
                           Slogans       0.68      0.28      0.40       129
       Thou




# Abliation

In [12]:
train_cols = ['span_word_length',
 'article_one_word_counter',
 'article_span_sentence_counter',
 'word_resemble_factor',
 'word_count_span_sent']

In [13]:
combinations = {"Bert Only":{"train_cols":train_cols, 
                             "train_df":train_df, 
                             "y_true":y_true, 
                             "y_train":y_train, 
                             "w_features":False, 
                             "w_bert":True, 
                             "w_log":False, 
                             "weighted_log":False
                            }, 
                "Bert and all HC":{"train_cols":train_cols, 
                                 "train_df":train_df, 
                                 "y_true":y_true, 
                                 "y_train":y_train, 
                                 "w_features":True, 
                                 "w_bert":True, 
                                 "w_log":False, 
                                 "weighted_log":False
                                },
                "Bert and wLogreg":{"train_cols":train_cols, 
                                 "train_df":train_df, 
                                 "y_true":y_true, 
                                 "y_train":y_train, 
                                 "w_features":False, 
                                 "w_bert":True, 
                                 "w_log":True, 
                                 "weighted_log":True
                                },
                "Bert and logreg":{"train_cols":train_cols, 
                                 "train_df":train_df, 
                                 "y_true":y_true, 
                                 "y_train":y_train, 
                                 "w_features":False, 
                                 "w_bert":True, 
                                 "w_log":True, 
                                 "weighted_log":False
                                },
                "logreg and all HC":{"train_cols":train_cols, 
                                 "train_df":train_df, 
                                 "y_true":y_true, 
                                 "y_train":y_train, 
                                 "w_features":True, 
                                 "w_bert":False, 
                                 "w_log":True, 
                                 "weighted_log":False
                                },
                "wlogreg and all HC":{"train_cols":train_cols, 
                                 "train_df":train_df, 
                                 "y_true":y_true, 
                                 "y_train":y_train, 
                                 "w_features":True, 
                                 "w_bert":False, 
                                 "w_log":True, 
                                 "weighted_log":True
                                },
                "Bert, wLogreg and all HC":{"train_cols":train_cols, 
                                         "train_df":train_df, 
                                         "y_true":y_true, 
                                         "y_train":y_train, 
                                         "w_features":True, 
                                         "w_bert":True, 
                                         "w_log":True, 
                                         "weighted_log":True
                                        }, 
                "Bert, Logreg and all HC":{"train_cols":train_cols, 
                                         "train_df":train_df, 
                                         "y_true":y_true, 
                                         "y_train":y_train, 
                                         "w_features":True, 
                                         "w_bert":True, 
                                         "w_log":True, 
                                         "weighted_log":False
                                          }
               }


In [14]:
results = dict()
model_num = 1
for name, params in combinations.items():
    print("\n{}\n".format("="*60), name, "\nModel #{} out of {} models\n{}\n".format(model_num, len(combinations),"_"*30), sep="")
    model_num += 1
    preds = validate_feature_combination(**params)
    f1 = f1_score(y_true, preds, average="micro")
    results[name] = (f1, preds)
    print("Micro f1 = {}".format(f1))
    print(classification_report(y_true, preds))
    print()
    print()


  0%|          | 0/10 [00:00<?, ?it/s]


Bert Only
Model #1 out of 8 models
______________________________



100%|██████████| 10/10 [02:09<00:00, 12.92s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6462718224832762
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.36      0.15      0.21       144
          Appeal_to_fear-prejudice       0.43      0.47      0.45       294
    Bandwagon,Reductio_ad_hitlerum       0.60      0.08      0.15        72
           Black-and-White_Fallacy       0.37      0.07      0.11       107
         Causal_Oversimplification       0.46      0.34      0.39       209
                             Doubt       0.56      0.76      0.64       493
         Exaggeration,Minimisation       0.55      0.43      0.49       466
                       Flag-Waving       0.60      0.58      0.59       229
                   Loaded_Language       0.70      0.85      0.77      2123
             Name_Calling,Labeling       0.72      0.79      0.75      1058
                        Repetition       0.65      0.52      0.58       621
                           Slogans       0.65      0.26  

100%|██████████| 10/10 [02:13<00:00, 13.85s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6537771251427639
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.43      0.18      0.25       144
          Appeal_to_fear-prejudice       0.45      0.47      0.46       294
    Bandwagon,Reductio_ad_hitlerum       0.56      0.12      0.20        72
           Black-and-White_Fallacy       0.31      0.05      0.08       107
         Causal_Oversimplification       0.44      0.32      0.37       209
                             Doubt       0.56      0.76      0.65       493
         Exaggeration,Minimisation       0.55      0.45      0.50       466
                       Flag-Waving       0.58      0.57      0.57       229
                   Loaded_Language       0.71      0.85      0.78      2123
             Name_Calling,Labeling       0.72      0.79      0.75      1058
                        Repetition       0.68      0.57      0.62       621
                           Slogans       0.66      0.29  

100%|██████████| 10/10 [02:14<00:00, 13.20s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6495349975526187
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.49      0.18      0.26       144
          Appeal_to_fear-prejudice       0.45      0.46      0.46       294
    Bandwagon,Reductio_ad_hitlerum       0.64      0.12      0.21        72
           Black-and-White_Fallacy       0.58      0.10      0.17       107
         Causal_Oversimplification       0.43      0.33      0.38       209
                             Doubt       0.56      0.75      0.64       493
         Exaggeration,Minimisation       0.54      0.44      0.49       466
                       Flag-Waving       0.58      0.59      0.59       229
                   Loaded_Language       0.71      0.85      0.77      2123
             Name_Calling,Labeling       0.72      0.79      0.76      1058
                        Repetition       0.65      0.54      0.59       621
                           Slogans       0.56      0.19  

100%|██████████| 10/10 [02:17<00:00, 14.66s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6514929025942242
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.43      0.16      0.23       144
          Appeal_to_fear-prejudice       0.45      0.50      0.47       294
    Bandwagon,Reductio_ad_hitlerum       0.60      0.12      0.21        72
           Black-and-White_Fallacy       0.44      0.11      0.18       107
         Causal_Oversimplification       0.43      0.35      0.39       209
                             Doubt       0.55      0.75      0.64       493
         Exaggeration,Minimisation       0.55      0.45      0.49       466
                       Flag-Waving       0.62      0.59      0.60       229
                   Loaded_Language       0.71      0.85      0.77      2123
             Name_Calling,Labeling       0.73      0.80      0.76      1058
                        Repetition       0.66      0.52      0.58       621
                           Slogans       0.61      0.24  

100%|██████████| 10/10 [01:39<00:00,  9.09s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.4454233969652472
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.00      0.00      0.00       144
          Appeal_to_fear-prejudice       0.00      0.00      0.00       294
    Bandwagon,Reductio_ad_hitlerum       0.00      0.00      0.00        72
           Black-and-White_Fallacy       0.00      0.00      0.00       107
         Causal_Oversimplification       0.00      0.00      0.00       209
                             Doubt       0.27      0.70      0.39       493
         Exaggeration,Minimisation       0.00      0.00      0.00       466
                       Flag-Waving       0.00      0.00      0.00       229
                   Loaded_Language       0.51      0.82      0.63      2123
             Name_Calling,Labeling       0.35      0.33      0.34      1058
                        Repetition       0.65      0.48      0.55       621
                           Slogans       0.00      0.00  

100%|██████████| 10/10 [01:35<00:00,  8.36s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.4434654919236417
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.00      0.00      0.00       144
          Appeal_to_fear-prejudice       0.00      0.00      0.00       294
    Bandwagon,Reductio_ad_hitlerum       0.00      0.00      0.00        72
           Black-and-White_Fallacy       0.00      0.00      0.00       107
         Causal_Oversimplification       0.00      0.00      0.00       209
                             Doubt       0.27      0.70      0.39       493
         Exaggeration,Minimisation       0.00      0.00      0.00       466
                       Flag-Waving       0.00      0.00      0.00       229
                   Loaded_Language       0.52      0.81      0.63      2123
             Name_Calling,Labeling       0.34      0.34      0.34      1058
                        Repetition       0.65      0.47      0.54       621
                           Slogans       0.00      0.00  

100%|██████████| 10/10 [02:14<00:00, 13.66s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6482297275248817
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.53      0.20      0.29       144
          Appeal_to_fear-prejudice       0.44      0.50      0.46       294
    Bandwagon,Reductio_ad_hitlerum       0.44      0.10      0.16        72
           Black-and-White_Fallacy       0.38      0.06      0.10       107
         Causal_Oversimplification       0.41      0.27      0.33       209
                             Doubt       0.56      0.76      0.64       493
         Exaggeration,Minimisation       0.53      0.42      0.47       466
                       Flag-Waving       0.60      0.57      0.58       229
                   Loaded_Language       0.70      0.85      0.77      2123
             Name_Calling,Labeling       0.72      0.79      0.75      1058
                        Repetition       0.69      0.55      0.61       621
                           Slogans       0.65      0.24  

100%|██████████| 10/10 [02:18<00:00, 14.33s/it]


Micro f1 = 0.6534508076358296
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.51      0.25      0.33       144
          Appeal_to_fear-prejudice       0.45      0.48      0.47       294
    Bandwagon,Reductio_ad_hitlerum       0.77      0.14      0.24        72
           Black-and-White_Fallacy       0.38      0.07      0.12       107
         Causal_Oversimplification       0.45      0.33      0.38       209
                             Doubt       0.56      0.75      0.64       493
         Exaggeration,Minimisation       0.56      0.45      0.50       466
                       Flag-Waving       0.59      0.59      0.59       229
                   Loaded_Language       0.71      0.85      0.77      2123
             Name_Calling,Labeling       0.72      0.79      0.75      1058
                        Repetition       0.68      0.56      0.61       621
                           Slogans       0.66      0.22  

In [33]:
HC_only = {"HC only": {"train_cols":train_cols, 
          "train_df":train_df, 
          "y_true":y_true, 
          "y_train":y_train, 
          "w_features":True, 
          "w_bert":False, 
          "w_log":False, 
          "weighted_log":False}}

In [35]:
model_num = 1
for name, params in HC_only.items():
    print("\n{}\n".format("="*60), name, "\nModel #{} out of {} models\n{}\n".format(model_num, len(combinations),"_"*30), sep="")
    model_num += 1
    preds = validate_feature_combination(**params)
    f1 = f1_score(y_true, preds, average="micro")
    results[name] = (f1, preds)
    print("Micro f1 = {}".format(f1))
    print(classification_report(y_true, preds))
    print()
    print()



  0%|          | 0/10 [00:00<?, ?it/s]


HC only
Model #1 out of 8 models
______________________________



100%|██████████| 10/10 [01:30<00:00,  8.20s/it]

Micro f1 = 0.4444444444444444
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.00      0.00      0.00       144
          Appeal_to_fear-prejudice       0.00      0.00      0.00       294
    Bandwagon,Reductio_ad_hitlerum       0.00      0.00      0.00        72
           Black-and-White_Fallacy       0.00      0.00      0.00       107
         Causal_Oversimplification       0.00      0.00      0.00       209
                             Doubt       0.27      0.70      0.39       493
         Exaggeration,Minimisation       0.00      0.00      0.00       466
                       Flag-Waving       0.00      0.00      0.00       229
                   Loaded_Language       0.52      0.82      0.63      2123
             Name_Calling,Labeling       0.34      0.33      0.34      1058
                        Repetition       0.64      0.48      0.55       621
                           Slogans       0.00      0.00  




In [41]:
results_overview = [(name, x[0]) for name, x  in  sorted(results.items(), key=lambda x: x[1][0], reverse=True)]

In [42]:
results_overview

[('Bert and all HC', 0.6537771251427639),
 ('Bert, Logreg and all HC', 0.6534508076358296),
 ('Bert and logreg', 0.6514929025942242),
 ('Bert and wLogreg', 0.6495349975526187),
 ('Bert, wLogreg and all HC', 0.6482297275248817),
 ('Bert Only', 0.6462718224832762),
 ('logreg and all HC', 0.4454233969652472),
 ('HC only', 0.4444444444444444),
 ('wlogreg and all HC', 0.4434654919236417)]

# Examining Bert and all HC

In [24]:
train_cols = ['span_word_length',
 'article_one_word_counter',
 'article_span_sentence_counter',
 'word_resemble_factor',
 'word_count_span_sent']

In [28]:
feature_combinations = {"wo_"+ablation_col: [col for col in train_cols if col != ablation_col] for ablation_col in train_cols}

In [143]:
params = {"train_cols":train_cols, 
          "train_df":train_df, 
          "y_true":y_true, 
          "y_train":y_train, 
          "w_features":True, 
          "w_bert":True, 
          "w_log":True, 
          "weighted_log":False}

In [144]:
results_v2 = dict()
combination_num = 1
for name, tmp_features in feature_combinations.items():
    print("\n{}\n".format("="*60), name, "\nFeature comp. #{} out of {} combinations\n{}\n".format(combination_num, len(feature_combinations),"_"*30), sep="")
    combination_num += 1
    params["train_cols"] = tmp_features
    preds = validate_feature_combination(**params)
    f1 = f1_score(y_true, preds, average="micro")
    results_v2[name] = (f1, preds)
    print("Micro f1 = {}".format(f1))
    print(classification_report(y_true, preds))
    print()
    print()

  0%|          | 0/10 [00:00<?, ?it/s]


wo_span_word_length
Feature comp. #1 out of 5 combinations
______________________________



100%|██████████| 10/10 [02:09<00:00, 13.27s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6537771251427639
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.45      0.17      0.25       144
          Appeal_to_fear-prejudice       0.44      0.47      0.46       294
    Bandwagon,Reductio_ad_hitlerum       0.65      0.18      0.28        72
           Black-and-White_Fallacy       0.44      0.07      0.13       107
         Causal_Oversimplification       0.46      0.35      0.40       209
                             Doubt       0.55      0.77      0.64       493
         Exaggeration,Minimisation       0.55      0.44      0.49       466
                       Flag-Waving       0.59      0.59      0.59       229
                   Loaded_Language       0.71      0.85      0.78      2123
             Name_Calling,Labeling       0.72      0.79      0.75      1058
                        Repetition       0.70      0.55      0.62       621
                           Slogans       0.62      0.19  

100%|██████████| 10/10 [02:13<00:00, 14.44s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6510034263338228
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.48      0.21      0.29       144
          Appeal_to_fear-prejudice       0.45      0.48      0.46       294
    Bandwagon,Reductio_ad_hitlerum       0.62      0.11      0.19        72
           Black-and-White_Fallacy       0.47      0.07      0.13       107
         Causal_Oversimplification       0.41      0.32      0.36       209
                             Doubt       0.56      0.77      0.65       493
         Exaggeration,Minimisation       0.57      0.45      0.51       466
                       Flag-Waving       0.57      0.57      0.57       229
                   Loaded_Language       0.71      0.84      0.77      2123
             Name_Calling,Labeling       0.72      0.80      0.76      1058
                        Repetition       0.64      0.54      0.58       621
                           Slogans       0.62      0.29  

100%|██████████| 10/10 [02:15<00:00, 13.48s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6527981726219612
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.57      0.19      0.28       144
          Appeal_to_fear-prejudice       0.45      0.48      0.46       294
    Bandwagon,Reductio_ad_hitlerum       0.43      0.08      0.14        72
           Black-and-White_Fallacy       0.37      0.07      0.11       107
         Causal_Oversimplification       0.43      0.32      0.37       209
                             Doubt       0.55      0.77      0.64       493
         Exaggeration,Minimisation       0.54      0.43      0.48       466
                       Flag-Waving       0.59      0.58      0.59       229
                   Loaded_Language       0.71      0.86      0.77      2123
             Name_Calling,Labeling       0.72      0.79      0.75      1058
                        Repetition       0.70      0.54      0.61       621
                           Slogans       0.60      0.30  

100%|██████████| 10/10 [02:10<00:00, 12.67s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6519823788546255
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.48      0.19      0.28       144
          Appeal_to_fear-prejudice       0.44      0.47      0.45       294
    Bandwagon,Reductio_ad_hitlerum       0.62      0.11      0.19        72
           Black-and-White_Fallacy       0.43      0.08      0.14       107
         Causal_Oversimplification       0.43      0.33      0.37       209
                             Doubt       0.56      0.76      0.64       493
         Exaggeration,Minimisation       0.56      0.44      0.49       466
                       Flag-Waving       0.60      0.57      0.58       229
                   Loaded_Language       0.70      0.86      0.77      2123
             Name_Calling,Labeling       0.72      0.80      0.76      1058
                        Repetition       0.69      0.54      0.61       621
                           Slogans       0.56      0.23  

100%|██████████| 10/10 [02:12<00:00, 13.10s/it]


Micro f1 = 0.6524718551150269
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.57      0.24      0.33       144
          Appeal_to_fear-prejudice       0.44      0.47      0.45       294
    Bandwagon,Reductio_ad_hitlerum       0.73      0.11      0.19        72
           Black-and-White_Fallacy       0.39      0.07      0.11       107
         Causal_Oversimplification       0.43      0.34      0.38       209
                             Doubt       0.56      0.77      0.65       493
         Exaggeration,Minimisation       0.54      0.43      0.48       466
                       Flag-Waving       0.58      0.56      0.57       229
                   Loaded_Language       0.71      0.85      0.77      2123
             Name_Calling,Labeling       0.72      0.79      0.76      1058
                        Repetition       0.70      0.55      0.62       621
                           Slogans       0.58      0.25  

In [145]:
results_v2_overview = [(name, res[0]) for name, res in sorted(results_v2.items(), key=lambda x : x[1][0], reverse = True)]

In [146]:
results_v2_overview

[('wo_span_word_length', 0.6537771251427639),
 ('wo_article_span_sentence_counter', 0.6527981726219612),
 ('wo_word_count_span_sent', 0.6524718551150269),
 ('wo_word_resemble_factor', 0.6519823788546255),
 ('wo_article_one_word_counter', 0.6510034263338228)]

In [147]:
worst_features_combinations = [["span_word_length", "word_resemble_factor"], ["span_word_length", "word_resemble_factor", "word_count_span_sent"], ["span_word_length", "word_resemble_factor", "word_count_span_sent", "article_one_word_counter"]]
wo_worst_features = {"wo_comp_" + str(i) : [col for col in train_cols if col not in abliat_cols] for i, abliat_cols in enumerate(worst_features_combinations)}
 


In [148]:
worst_features_combinations

[['span_word_length', 'word_resemble_factor'],
 ['span_word_length', 'word_resemble_factor', 'word_count_span_sent'],
 ['span_word_length',
  'word_resemble_factor',
  'word_count_span_sent',
  'article_one_word_counter']]

In [149]:
wo_worst_features

{'wo_comp_0': ['article_one_word_counter',
  'article_span_sentence_counter',
  'word_count_span_sent'],
 'wo_comp_1': ['article_one_word_counter', 'article_span_sentence_counter'],
 'wo_comp_2': ['article_span_sentence_counter']}

In [150]:
results_v3 = dict()
combination_num = 1
for name, tmp_features in wo_worst_features.items():
    print("\n{}\n".format("="*60), name, "\nFeature comp. #{} out of {} combinations\n{}\n".format(combination_num, len(wo_worst_features),"_"*30), sep="")
    combination_num += 1
    params["train_cols"] = tmp_features
    preds = validate_feature_combination(**params)
    f1 = f1_score(y_true, preds, average="micro")
    results_v3[name] = (f1, preds)
    print("Micro f1 = {}".format(f1))
    print(classification_report(y_true, preds))
    print()
    print()

  0%|          | 0/10 [00:00<?, ?it/s]


wo_comp_0
Feature comp. #1 out of 3 combinations
______________________________



100%|██████████| 10/10 [02:11<00:00, 13.34s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6536139663892968
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.50      0.20      0.29       144
          Appeal_to_fear-prejudice       0.45      0.48      0.47       294
    Bandwagon,Reductio_ad_hitlerum       0.50      0.12      0.20        72
           Black-and-White_Fallacy       0.41      0.08      0.14       107
         Causal_Oversimplification       0.46      0.33      0.38       209
                             Doubt       0.55      0.76      0.64       493
         Exaggeration,Minimisation       0.55      0.46      0.50       466
                       Flag-Waving       0.60      0.58      0.59       229
                   Loaded_Language       0.71      0.85      0.77      2123
             Name_Calling,Labeling       0.73      0.79      0.76      1058
                        Repetition       0.69      0.56      0.61       621
                           Slogans       0.57      0.23  

100%|██████████| 10/10 [02:28<00:00, 15.51s/it]
  0%|          | 0/10 [00:00<?, ?it/s]

Micro f1 = 0.6521455376080927
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.40      0.13      0.20       144
          Appeal_to_fear-prejudice       0.43      0.45      0.44       294
    Bandwagon,Reductio_ad_hitlerum       0.73      0.15      0.25        72
           Black-and-White_Fallacy       0.55      0.11      0.19       107
         Causal_Oversimplification       0.45      0.33      0.39       209
                             Doubt       0.56      0.76      0.64       493
         Exaggeration,Minimisation       0.55      0.45      0.49       466
                       Flag-Waving       0.59      0.59      0.59       229
                   Loaded_Language       0.70      0.85      0.77      2123
             Name_Calling,Labeling       0.73      0.79      0.76      1058
                        Repetition       0.69      0.56      0.62       621
                           Slogans       0.64      0.29  

100%|██████████| 10/10 [02:21<00:00, 14.09s/it]


Micro f1 = 0.648556045031816
                                    precision    recall  f1-score   support

               Appeal_to_Authority       0.49      0.17      0.26       144
          Appeal_to_fear-prejudice       0.43      0.47      0.45       294
    Bandwagon,Reductio_ad_hitlerum       0.56      0.12      0.20        72
           Black-and-White_Fallacy       0.56      0.09      0.16       107
         Causal_Oversimplification       0.43      0.35      0.39       209
                             Doubt       0.55      0.75      0.64       493
         Exaggeration,Minimisation       0.56      0.43      0.48       466
                       Flag-Waving       0.56      0.55      0.56       229
                   Loaded_Language       0.71      0.85      0.77      2123
             Name_Calling,Labeling       0.72      0.80      0.76      1058
                        Repetition       0.65      0.54      0.59       621
                           Slogans       0.69      0.27   

In [151]:
results_v3_overview = [(name, res[0]) for name, res in sorted(results_v3.items(), key=lambda x : x[1][0], reverse = True)]

In [152]:
results_v3_overview

[('wo_comp_0', 0.6536139663892968),
 ('wo_comp_1', 0.6521455376080927),
 ('wo_comp_2', 0.648556045031816)]

In [132]:
ens_X_train = np.concatenate((log_train_sm, train_df[train_cols].to_numpy()), 1)
ens_X_train = torch.cat((bert_hidden_states[train_df["bert_encoding_id"].values], torch.FloatTensor(ens_X_train)), 1)

In [133]:
logreg_weighted = LogisticRegression(penalty='l2', solver="lbfgs")
logreg_weighted.fit(train_df[train_cols], y_true)
log_baseline_test = logreg_weighted.predict_proba(test_df[train_cols])

In [134]:
ens_X_test = np.concatenate((log_baseline_test, test_df[train_cols].to_numpy()), 1)
ens_X_test = torch.cat((bert_dev_hidden_states[test_df["bert_encoding_id"].values], torch.FloatTensor(ens_X_test)), 1)

In [139]:
model = ModelFit()
model.fit(ens_X_train, y_train, verbose = True)

0 0.07623909413814545
10 0.05530227720737457
20 0.050052884966135025
30 0.04673120379447937
40 0.04454762861132622
50 0.04316195845603943
60 0.04214479401707649
70 0.0413069948554039
80 0.040652792900800705
90 0.04015569016337395
100 0.039709609001874924
110 0.0393243208527565
120 0.03899726644158363
130 0.038728248327970505
140 0.038529690355062485
150 0.038393884897232056
160 0.038286466151475906
170 0.038236457854509354
180 0.038228489458560944
190 0.038278672844171524


Sequential(
  (0): Linear(in_features=3091, out_features=250, bias=True)
  (1): ReLU()
  (2): Linear(in_features=250, out_features=250, bias=True)
  (3): ReLU()
  (4): Linear(in_features=250, out_features=14, bias=True)
)

In [140]:
y_pred = model.predict(ens_X_test)

In [141]:
model.get_label(y_pred)

array(['Loaded_Language', 'Loaded_Language', 'Name_Calling,Labeling', ...,
       'Loaded_Language', 'Name_Calling,Labeling', 'Flag-Waving'],
      dtype='<U30')

In [142]:
test_df["gold_label"] = model.get_label(y_pred)
datawriter = DataWriter()
datawriter.pred_writer(test_df, "../predictions/ablation_wo_finetuning.txt") 

Predictions written to file ../predictions/ablation_wo_finetuning.txt
