In [1]:
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0
%env CUDA_LAUNCH_BLOCKING=1

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0
env: CUDA_LAUNCH_BLOCKING=1


In [2]:
import argparse
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
np.random.seed(0)

import os
# from datetime import datetime
from os import path
from tqdm import tqdm

In [3]:
# Models
import torch
SEED = 42
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

import torch.nn as nn
from transformers import WEIGHTS_NAME, CONFIG_NAME

In [4]:
# Training
import torch.optim as optim

# Evaluation
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns

In [5]:
# import importlib

from dataset import *
from model import SequenceClassification, MODEL_CLASSES
from utils import (evaluate, evaluate_metrics, 
                   save_metrics, load_metrics, predict_ensemble)

In [6]:
# dateTimeObj = datetime.now()

In [7]:
parser = argparse.ArgumentParser()
parser.add_argument('--num_epochs', type=int, default=2)
parser.add_argument('--learning_rate', type=float, default=2e-5)
parser.add_argument('--batch_size', type=int, default=64)
parser.add_argument('--ens_iter', type=int, default=2)
parser.add_argument('--thresh', type=float, default=0.9)
parser.add_argument('--n_folds', type=int, default=5)
parser.add_argument('--label', type=str, default='Label')
parser.add_argument('--early_stopping_thresh', type=float, default=0.1)
parser.add_argument('--model_path', type=str, default='models_testing')
parser.add_argument('--model_name', type=str, default='bert')
parser.add_argument('--eval_bs', type=int, default=600)

# args = parser.parse_args()
args = parser.parse_args(args=[])
print (args)

if not path.isdir(args.model_path):
    os.mkdir(args.model_path)

import json
with open(args.model_path + '/args.txt', 'w') as f:
    json.dump(args.__dict__, f, indent=2)

Namespace(batch_size=64, early_stopping_thresh=0.1, ens_iter=2, eval_bs=600, label='Label', learning_rate=2e-05, model_name='bert', model_path='models_testing', n_folds=5, num_epochs=2, thresh=0.9)


In [8]:
def train(model,
          optimizer,
          train_loader,
          valid_loader,
          iteration,
          model_path,
          num_epochs = 1,
          early_stopping_thresh=None):
    
    best_valid_loss = float("Inf")
    eval_every = len(train_loader) // 6
    
    weights_path = path.join(model_path, WEIGHTS_NAME)
    config_path = path.join(model_path, CONFIG_NAME)
    metrics_path = path.join(model_path, 'metrics.pt')
    plot_path = path.join(model_path, 'plot_losses.png')

    average_train_loss = 0.0
    average_valid_loss = 0.0

    # initialize running values
    running_loss = 0.0
    valid_running_loss = 0.0
    global_step = 0
    train_loss_list = []
    valid_loss_list = []
    global_steps_list = []
    
    # training loop
    model.train()
    for epoch in range(num_epochs):
        for batch in train_loader:
            labels = batch[4].type(torch.LongTensor) 
            labels = labels.to(device) 
            b_masks = batch[3].type(torch.LongTensor) 
            b_masks = b_masks.to(device) 
            masks = batch[1].type(torch.LongTensor) 
            masks = masks.to(device) 
            b_comments = batch[2].type(torch.LongTensor)  
            b_comments = b_comments.to(device)
            comments = batch[0].type(torch.LongTensor)  
            comments = comments.to(device)
            outputs = model(input_ids=comments, input_ids_adv=b_comments,
                            attention_mask=masks, attention_mask_adv=b_masks,
                            labels=labels)
            loss, logits = outputs[:2]

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # update running values
            running_loss += loss.item()
            global_step += 1

            # evaluation step
            if global_step % eval_every == 0:
                model.eval()
                with torch.no_grad():                    

                    # validation loop
                    for batch in valid_loader:
                        labels = batch[4].type(torch.LongTensor)           
                        labels = labels.to(device)
                        b_masks = batch[3].type(torch.LongTensor) 
                        b_masks = b_masks.to(device) 
                        masks = batch[1].type(torch.LongTensor) 
                        masks = masks.to(device) 
                        b_comments = batch[2].type(torch.LongTensor)  
                        b_comments = b_comments.to(device)
                        comments = batch[0].type(torch.LongTensor)  
                        comments = comments.to(device)
                        outputs = model(input_ids=comments, input_ids_adv=b_comments,
                                        attention_mask=masks, attention_mask_adv=b_masks,
                                        labels=labels)
                        loss, logits = outputs[:2]
                        
                        valid_running_loss += loss.item()

                # evaluation
                average_train_loss = running_loss / eval_every
                average_valid_loss = valid_running_loss / len(valid_loader)
                train_loss_list.append(average_train_loss)
                valid_loss_list.append(average_valid_loss)
                global_steps_list.append(global_step)

                # resetting running values
                running_loss = 0.0                
                valid_running_loss = 0.0
                model.train()

                # print progress
                print('\nEpoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'
                      .format(epoch+1, num_epochs, global_step, num_epochs*len(train_loader),
                              average_train_loss, average_valid_loss))
                
                # checkpoint
                if best_valid_loss > average_valid_loss:
                    best_valid_loss = average_valid_loss
                    # model.save_pretrained(model_path)
                    model.config.save_pretrained(model_path)
                    torch.save(model.state_dict(), weights_path)
                    save_metrics(metrics_path, train_loss_list, valid_loss_list, global_steps_list)

                if early_stopping_thresh is not None:
                    if average_valid_loss - average_train_loss > args.early_stopping_thresh:
                        break

        if early_stopping_thresh is not None:
            if average_valid_loss - average_train_loss > args.early_stopping_thresh:
                print ("Early stopping")
                break
    
    save_metrics(metrics_path, train_loss_list, valid_loss_list, global_steps_list)
    
    train_loss_list, valid_loss_list, global_steps_list = load_metrics(metrics_path)
    plt.plot(global_steps_list, train_loss_list, label='Train')
    plt.plot(global_steps_list, valid_loss_list, label='Valid')
    plt.xlabel('Global Steps')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(plot_path)
    plt.show() 

In [9]:
def get_tokenizer(model_name, init):
    _, _, tokenizer_class, options_name = MODEL_CLASSES[model_name]
    
    if init:
        model_path = options_name
    else:
        model_path = path.join(args.model_path, model_name)
    
    print ("Loading tokenizer from {}".format(model_path))
    return tokenizer_class.from_pretrained(model_path)


def save_tokenizer(tokenizer, model_name):
    model_path = path.join(args.model_path, model_name)
    tokenizer.save_pretrained(model_path)
    print ("Saving Tokenizer in {}".format(model_path))

In [10]:
for iteration in range(0, 2):
    outputs = []
    
    for model_name in MODEL_CLASSES:
        print ("Model {}".format(model_name))
        
        # paths
        model_path = path.join(args.model_path, model_name)
        if not path.exists(model_path): 
            os.mkdir(model_path)
        
        iteration_path = path.join(model_path, 'run' + str(iteration))
        
        # tokenizer and dataset
        try:
            tokenizer = get_tokenizer(model_name, False)
        except:
            tokenizer = get_tokenizer(model_name, True)
        
        trainval_iter, test_data, unlabeled_data = get_dataset(ensemble_home=args.model_path, 
                                                               cache_path=model_path,
                                                               iteration=iteration, label=args.label, 
                                                               tokenizer=tokenizer) 
        
        for fold in range(1):
            curr_path = iteration_path + "_" + str(fold)
        
            if path.exists(path.join(curr_path, WEIGHTS_NAME)): 
                print ("Model run {} fold {} already trained".format(iteration, fold)) 
                model = SequenceClassification(model_name=model_name, model_path=curr_path).to(device) 
                model.load_state_dict(torch.load(path.join(curr_path, WEIGHTS_NAME), map_location=device))
            else:
                print ("Training model {} run {} fold {}".format(model_name, iteration, fold)) 

                if not path.exists(curr_path):
                    os.mkdir(curr_path) 
                
                save_tokenizer(tokenizer, model_name) 

                # data and iterator
                train_data, weights, val_data = next(trainval_iter)
                train_iter = get_iterator(dataset=train_data, batch_size=args.batch_size, shuffle=True, weights = weights)
                valid_iter = get_iterator(dataset=val_data, batch_size=args.batch_size, shuffle=False)

                # model and train
                model = SequenceClassification(model_name=model_name).to(device)
                optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
                train(model=model, optimizer=optimizer, 
                        model_path=curr_path, 
                        early_stopping_thresh=None,
                        iteration=iteration, num_epochs=args.num_epochs,
                        train_loader=train_iter, valid_loader=valid_iter)

                # evaluation - validation
                print ("Validation set evaluation")
                valid_iter = get_iterator(dataset=val_data, batch_size=args.eval_bs, shuffle=False)
                evaluate_metrics(model, valid_iter)

            # evaluation - test
            print ("Test set evaluation")
            test_iter = get_iterator(dataset=test_data, batch_size=args.eval_bs, shuffle=False) 
            evaluate_metrics(model, test_iter) 

            # evaluate - unlabeled
            print ("Unlabeled set pseudo-labelling")
            output_path = path.join(curr_path, "output.pt")
            if path.exists(output_path):
                print ("Model:{} loading labels for unlabeled data".format(model_name))
                output = torch.load(output_path)
            else:
                unlabeled_iter = get_iterator(dataset=unlabeled_data, batch_size=args.eval_bs, shuffle=False) 
                output = evaluate(model, unlabeled_iter)
                print ("Model:{} saving labels for unlabeled data".format(model_name))
                torch.save(output, output_path)

            outputs.append(output)

            print ("\n\n")
    
    y_pred, y_conf = predict_ensemble(outputs)
    unlabeled_df = add_ensemble_data(y_pred, y_conf, LABEL=args.label, 
                                     ensemble_home=args.model_path, iteration=iteration)

Model albert
Loading tokenizer from models_testing/albert


Tokenize: 8411it [00:04, 1965.75it/s]
b_Tokenize: 8411it [00:04, 1899.43it/s]


Loading processed unlabeled samples from models_testing/albert/unlabeled_processed_0.pt
Model run 0 fold 0 already trained
Test set evaluation
Get iterator of dataset with length 8411
Classification Report:
              precision    recall  f1-score   support

           0     0.7140    0.6737    0.6933      4999
           1     0.5833    0.2083    0.3070      1613
           2     0.2396    0.6853    0.3551       464
           3     0.5695    0.8795    0.6913       531
           4     0.0732    0.1250    0.0923        24
           5     0.5796    0.6910    0.6304       780

    accuracy                         0.5981      8411
   macro avg     0.4599    0.5438    0.4616      8411
weighted avg     0.6394    0.5981    0.5929      8411

Unlabeled set pseudo-labelling
Model:albert loading labels for unlabeled data



Model bert
Loading tokenizer from models_testing/bert


Tokenize: 8411it [00:05, 1435.14it/s]
b_Tokenize: 8411it [00:05, 1407.62it/s]


Loading processed unlabeled samples from models_testing/bert/unlabeled_processed_0.pt
Model run 0 fold 0 already trained
Test set evaluation
Get iterator of dataset with length 8411
Classification Report:
              precision    recall  f1-score   support

           0     0.7095    0.6233    0.6636      4999
           1     0.4644    0.2306    0.3082      1613
           2     0.2559    0.6724    0.3708       464
           3     0.5918    0.9284    0.7229       531
           4     0.5000    0.2500    0.3333        24
           5     0.4922    0.7282    0.5874       780

    accuracy                         0.5786      8411
   macro avg     0.5023    0.5722    0.4977      8411
weighted avg     0.6093    0.5786    0.5750      8411

Unlabeled set pseudo-labelling
Model:bert loading labels for unlabeled data



Model distilBert
Loading tokenizer from models_testing/distilBert


Tokenize: 8411it [00:06, 1387.67it/s]
b_Tokenize: 8411it [00:06, 1387.99it/s]


Loading processed unlabeled samples from models_testing/distilBert/unlabeled_processed_0.pt
Model run 0 fold 0 already trained
Test set evaluation
Get iterator of dataset with length 8411
Classification Report:
              precision    recall  f1-score   support

           0     0.7325    0.6339    0.6797      4999
           1     0.5289    0.3577    0.4268      1613
           2     0.2409    0.6961    0.3579       464
           3     0.6239    0.8343    0.7139       531
           4     0.3125    0.2083    0.2500        24
           5     0.5933    0.7051    0.6444       780

    accuracy                         0.6024      8411
   macro avg     0.5053    0.5726    0.5121      8411
weighted avg     0.6454    0.6024    0.6111      8411

Unlabeled set pseudo-labelling
Model:distilBert loading labels for unlabeled data



Agreement in ensemble is 0.00
Ensemble iteration 0
Total samples 3000
Original test file: /dgxhome/cra5302/MMHS/Train/unlabeled.csv


ValueError: Length of values does not match length of index