In [None]:
def run_utils():
    # Get the GPU device name.
    device_name = tf.test.gpu_device_name()
    # The device name should look like the following:
    if device_name == '/device:GPU:0':
        print('Found GPU at: {}'.format(device_name))
    else:
        raise SystemError('GPU device not found')

    device = None
    # If there's a GPU available...
    if torch.cuda.is_available():    
        # Tell PyTorch to use the GPU.    
        device = torch.device("cuda")
        print('There are %d GPU(s) available.' % torch.cuda.device_count())
        print('We will use the GPU:', torch.cuda.get_device_name(0))
    # If not...
    else:
        print('No GPU available, using the CPU instead.')
        device = torch.device("cpu")

    return device

In [None]:
def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [None]:
def load_tokenizer(model):
    tokenizer = None
    from transformers import AutoTokenizer, DistilBertTokenizer, BertTokenizer, RobertaTokenizer, AutoModelWithLMHead
    #tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    #tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased', do_lower_case=True)
    
    return tokenizer

In [None]:
def concanListStrings(list1, list2):
    list3 = []
    myLen1 = len(list1)
    if myLen1 != len(list2):
        print("Length - error")
    for idx in range(0, myLen1):
        list3.append(list1[idx] + " " + list2[idx])
    return list3

In [None]:
def concanListStrings_sep(list1, list2):
    list3 = []
    myLen1 = len(list1)
    if myLen1 != len(list2):
        print("Length - error")
    for idx in range(0, myLen1):
        list3.append(list1[idx] + " [SEP] " + list2[idx])

    return list3

In [None]:
def optimizer_to(optim, device):
    for param in optim.state.values():
        # Not sure there are any global tensors in the state dict
        if isinstance(param, torch.Tensor):
            param.data = param.data.to(device)
            if param._grad is not None:
                param._grad.data = param._grad.data.to(device)
        elif isinstance(param, dict):
            for subparam in param.values():
                if isinstance(subparam, torch.Tensor):
                    subparam.data = subparam.data.to(device)
                    if subparam._grad is not None:
                        subparam._grad.data = subparam._grad.data.to(device)

In [None]:
def return_batches_datasets(datasetTrain, datasetVal, batch_size = 16):
    from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
        
    # Create the DataLoaders for our training and validation sets.
    # We'll take training samples in random order. 
    train_dataloader = DataLoader(
            datasetTrain,  # The training samples.
            sampler =  RandomSampler(datasetTrain), # Select batches randomly
            batch_size = batch_size, # Trains with this batch size., 
            num_workers=8, drop_last=True
        )

    # For validation the order doesn't matter, so we'll just read them sequentially.
    validation_dataloader = DataLoader(
            datasetVal, # The validation samples.
            sampler = SequentialSampler(datasetVal), # Pull out batches sequentially.
            batch_size = batch_size, # Evaluate with this batch size.
            num_workers=8, drop_last=True
        )
    
    
    #validation_dataloader = DataLoader(
    #        datasetVal, # The validation samples.
    #        sampler = SequentialSampler(datasetVal), # Pull out batches sequentially.
    #        batch_size = batch_size, # Evaluate with this batch size.
    #        num_workers=0, drop_last=True
    #)
    
    return train_dataloader, validation_dataloader

In [None]:
def print_summary(training_stats):
    # Display floats with two decimal places.
    pd.set_option('precision', 4)
    
    pd.set_option('display.max_rows', 500)
    pd.set_option('display.max_columns', 500)

    # Create a DataFrame from our training statistics.
    df_stats = pd.DataFrame(data=training_stats)

    # Use the 'epoch' as the row index.
    df_stats = df_stats.set_index('epoch')

    # A hack to force the column headers to wrap.
    #df = df.style.set_table_styles([dict(selector="th",props=[('max-width', '70px')])])


    # Display the table.
    print(df_stats)
    return df_stats

In [None]:
def plot_results(df_stats, last_epoch):
    # Use plot styling from seaborn.
    sns.set(style='darkgrid')

    # Increase the plot size and font size.
    sns.set(font_scale=1.5)
    plt.rcParams["figure.figsize"] = (12,6)
    
    plot1 = plt.figure(1)
    
    plt.plot(df_stats['Training Loss'], 'b-o', label="Training_Loss")
    plt.plot(df_stats['Valid. Loss'], 'g-o', label="Val_Loss")

    # Label the plot.
    plt.title("Training & Val Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    #plt.autoscale(enable=True, axis='x')
    
    plot2 = plt.figure(2)

    x_ticks = []
    for currEpoch in range(1, last_epoch+1):
        x_ticks.append(currEpoch)
    #plt.xticks(x_ticks)
    plt.xticks(rotation=90)
    
    plt.plot(df_stats['Valid. Stance Accur.'], 'b-o', label="Valid. Stance Accur.")

    # Label the plot.
    plt.title("Val Stance & Ideology Acc")
    plt.xlabel("Epoch")
    plt.ylabel("Acc")
    plt.legend()

In [None]:
import numpy as np
import torch

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.val_acc_max_stance = -1
        self.delta = delta

    def __call__(self, val_loss, val_acc_stance, model_save_state, model_save_path, model, tokenizer):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, val_acc_stance, model_save_state, model_save_path, model, tokenizer)
            self.val_acc_max_stance = val_acc_stance
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, val_acc_stance, model_save_state, model_save_path, model, tokenizer)
            self.val_acc_max_stance = val_acc_stance
            self.counter = 0

            #self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True

    def save_checkpoint(self, val_loss, val_acc_stance, model_save_state, model_save_path, model, tokenizer):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
            print(f'Validation acc stance : ({self.val_acc_max_stance:.6f} --> {val_acc_stance:.6f}).  Saving model ...')
        #torch.save(model.module.state_dict(), 'checkpoint.pt')
        
        torch.save(model_save_state, model_save_path)
        
        
        #model.save_pretrained('model_save/')
        #tokenizer.save_pretrained('model_save/')
        # Good practice: save your training arguments together with the trained model
        #torch.save(model, './model_save/entire_model.pt')
        self.val_loss_min = val_loss

In [None]:
import numpy as np
import torch

class EarlyStoppingIdeology:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.val_acc_max_stance = -1
        self.val_acc_max_ideology = -1
        self.delta = delta

    def __call__(self, val_loss, val_acc_stance, val_acc_ideology, model_save_state, model_save_path, model, tokenizer):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, val_acc_stance, val_acc_ideology, model_save_state, model_save_path, model, tokenizer)
            self.val_acc_max_stance = val_acc_stance
            self.val_acc_max_ideology = val_acc_ideology
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, val_acc_stance, val_acc_ideology, model_save_state, model_save_path, model, tokenizer)
            self.val_acc_max_stance = val_acc_stance
            self.val_acc_max_ideology = val_acc_ideology
            self.counter = 0

            #self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True

    def save_checkpoint(self, val_loss, val_acc_stance, val_acc_ideology, model_save_state, model_save_path, model, tokenizer):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
            print(f'Validation acc stance : ({self.val_acc_max_stance:.6f} --> {val_acc_stance:.6f}).  Saving model ...')
            print(f'Validation acc ideology : ({self.val_acc_max_ideology:.6f} --> {val_acc_ideology:.6f}).  Saving model ...')
        #torch.save(model.module.state_dict(), 'checkpoint.pt')
        
        torch.save(model_save_state, model_save_path)
        
        
        #model.save_pretrained('model_save/')
        #tokenizer.save_pretrained('model_save/')
        # Good practice: save your training arguments together with the trained model
        #torch.save(model, './model_save/entire_model.pt')
        self.val_loss_min = val_loss

In [None]:
import numpy as np
import torch

class EarlyStoppingUnigram:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.delta = delta

    def __call__(self, model_save_state, model_save_path, model, tokenizer):

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(model_save_state, model_save_path, model, tokenizer)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(model_save_state, model_save_path, model, tokenizer)
            self.counter = 0

            #self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True

    def save_checkpoint(self, model_save_state, model_save_path, model, tokenizer):
        '''Saves model when validation loss decrease.'''
        #torch.save(model.module.state_dict(), 'checkpoint.pt')
        
        torch.save(model_save_state, model_save_path)

In [None]:
import tensorflow as tf
import torch
import datetime
import os
import string
import pandas as pd
import numpy as np