In [1]:
from __future__ import absolute_import, division, print_function

import argparse
import sys
import csv
import logging
import os
import random
import sys

import numpy as np
import torch
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
                              TensorDataset)
from torch.utils.data.distributed import DistributedSampler
from tqdm import tqdm, trange


from sklearn.metrics import classification_report

import random

csv.field_size_limit(sys.maxsize)
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.INFO)
logger = logging.getLogger(__name__)



In [2]:
class InputExample(object):
    """A single training/test example for simple sequence classification."""

    def __init__(self, guid, text_a, text_b=None, label=None):
        """Constructs a InputExample.
        Args:
            guid: Unique id for the example.
            text_a: string. The untokenized text of the first sequence. For single
            sequence tasks, only this sequence must be specified.
            text_b: (Optional) string. The untokenized text of the second sequence.
            Only must be specified for sequence pair tasks.
            label: (Optional) string. The label of the example. This should be
            specified for train and dev examples, but not for test examples.
        """
        self.guid = guid
        self.text_a = text_a
        self.text_b = text_b
        self.label = label

In [3]:
class InputFeatures(object):
    """A single set of features of data."""

    def __init__(self, input_ids, input_mask, segment_ids, label_id):
        self.input_ids = input_ids
        self.input_mask = input_mask
        self.segment_ids = segment_ids
        self.label_id = label_id

In [4]:
class DataProcessor(object):
    """Base class for data converters for sequence classification data sets."""

    def get_train_examples(self, data_dir):
        """Gets a collection of `InputExample`s for the train set."""
        raise NotImplementedError()

    def get_dev_examples(self, data_dir):
        """Gets a collection of `InputExample`s for the dev set."""
        raise NotImplementedError()

    def get_labels(self):
        """Gets the list of labels for this data set."""
        raise NotImplementedError()

    @classmethod
    def _read_tsv(cls, input_file, quotechar=None):
        """Reads a tab separated value file."""
        with open(input_file, "r") as f:
            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
            lines = []
            for line in reader:
                if sys.version_info[0] == 2:
                    line = list(unicode(cell, 'utf-8') for cell in line)
                lines.append(line)
            return lines

In [5]:
def get_tp_fp_fn(logits, labels):
    assert labels.shape[1] == 1
    labels = labels.squeeze()
    predictions = np.argmax(logits, axis=1)
    labels, predictions = labels.astype(int), predictions.astype(int)
    tp = np.sum(np.logical_and(predictions == 1, labels == 1))
    fp = np.sum(np.logical_and(predictions == 1, labels == 0))
    fn = np.sum(np.logical_and(predictions == 0, labels == 1))
    return tp, fp, fn

def compute_metrics(tp, fp, fn):
  precision = tp / (tp + fp + np.finfo(float).eps)
  recall = tp / (tp + fn + np.finfo(float).eps)
  f1 = 2 * precision * recall / (precision + recall + np.finfo(float).eps)
  return precision, recall, f1

class CLPsychProcessor(DataProcessor):
    """Processor for the CLPsych data set."""

    def get_train_examples(self, data_dir):
        """See base class."""
        return self._create_examples(
            # self._read_tsv(os.path.join(data_dir, "User_Posts_Processed_Train_Final.tsv")), "train")
            self._read_tsv(os.path.join(data_dir, "User_Posts_Processed_Train_Final.tsv")), "train")

    def get_dev_examples(self, data_dir):
        """See base class."""
        return self._create_examples(
            # self._read_tsv(os.path.join(data_dir, "User_Posts_Processed_Dev_Final.tsv")), "dev")
            self._read_tsv(os.path.join(data_dir, "User_Posts_Processed_Dev_Final.tsv")), "dev")


    def get_test_examples(self, data_dir):
        """See base class."""
        return self._create_examples_test(
            # self._read_tsv(os.path.join(data_dir, "User_Posts_Processed_Test_Final.tsv")), "test")
            self._read_tsv(os.path.join(data_dir, "Full_Test_Data.tsv")), "test")


    def get_labels(self):
        """See base class."""
        return ["a", "b", "c", "d"]

    def _create_examples_test(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        random.seed(9001)
        req = list()
        for i in range(0, len(lines)):
            req.append(i)
        req_final = random.sample(req, len(lines))
        for i in req_final:
            if i == 0:
                continue
            guid = lines[i][0]
            text_a = lines[i][1]
            text_b = None
            examples.append(
                InputExample(guid=guid, text_a=text_a, text_b=text_b))
        return examples

    def _create_examples(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        random.seed(9001)
        req = list()
        for i in range(0, len(lines)):
            req.append(i)
        req_final = random.sample(req, len(lines))	
        for i in req_final:
            if i == 0:
                continue
            guid = lines[i][0]
            text_a = lines[i][1]
            text_b = None
            label = lines[i][2]
            examples.append(
                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
        return examples
         

In [6]:
def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer):
    """Loads a data file into a list of `InputBatch`s."""

    label_map = {label : i for i, label in enumerate(label_list)}

    features = []
    for (ex_index, example) in enumerate(examples):
        tokens_a = tokenizer.tokenize(example.text_a)

        tokens_b = None
        if example.text_b:
            tokens_b = tokenizer.tokenize(example.text_b)
            # Modifies `tokens_a` and `tokens_b` in place so that the total
            # length is less than the specified length.
            # Account for [CLS], [SEP], [SEP] with "- 3"
            _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
        else:
            # Account for [CLS] and [SEP] with "- 2"
            if len(tokens_a) > max_seq_length - 2:
                tokens_a = tokens_a[:(max_seq_length - 2)]

        # The convention in BERT is:
        # (a) For sequence pairs:
        #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
        #  type_ids: 0   0  0    0    0     0       0 0    1  1  1  1   1 1
        # (b) For single sequences:
        #  tokens:   [CLS] the dog is hairy . [SEP]
        #  type_ids: 0   0   0   0  0     0 0
        #
        # Where "type_ids" are used to indicate whether this is the first
        # sequence or the second sequence. The embedding vectors for `type=0` and
        # `type=1` were learned during pre-training and are added to the wordpiece
        # embedding vector (and position vector). This is not *strictly* necessary
        # since the [SEP] token unambigiously separates the sequences, but it makes
        # it easier for the model to learn the concept of sequences.
        #
        # For classification tasks, the first vector (corresponding to [CLS]) is
        # used as as the "sentence vector". Note that this only makes sense because
        # the entire model is fine-tuned.
        tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
        segment_ids = [0] * len(tokens)

        if tokens_b:
            tokens += tokens_b + ["[SEP]"]
            segment_ids += [1] * (len(tokens_b) + 1)

        input_ids = tokenizer.convert_tokens_to_ids(tokens)

        # The mask has 1 for real tokens and 0 for padding tokens. Only real
        # tokens are attended to.
        input_mask = [1] * len(input_ids)

        # Zero-pad up to the sequence length.
        padding = [0] * (max_seq_length - len(input_ids))
        input_ids += padding
        input_mask += padding
        segment_ids += padding

        assert len(input_ids) == max_seq_length
        assert len(input_mask) == max_seq_length
        assert len(segment_ids) == max_seq_length

        label_id = None
        if(example.label is not None):
            label_id = label_map[example.label]
        #if ex_index < 5:
        #    logger.info("*** Example ***")
        #    logger.info("guid: %s" % (example.guid))
        #    logger.info("tokens: %s" % " ".join(
        #            [str(x) for x in tokens]))
        #    logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
        #    logger.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
        #    logger.info(
        #            "segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
        #    logger.info("label: %s (id = %d)" % (example.label, label_id))

        features.append(
                InputFeatures(input_ids=input_ids,
                              input_mask=input_mask,
                              segment_ids=segment_ids,
                              label_id=label_id))
    return features

In [7]:
def _truncate_seq_pair(tokens_a, tokens_b, max_length):
    """Truncates a sequence pair in place to the maximum length."""

    # This is a simple heuristic which will always truncate the longer sequence
    # one token at a time. This makes more sense than truncating an equal percent
    # of tokens from each, since if one sequence is very short then each token
    # that's truncated likely contains more information than a longer sequence.
    while True:
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
        if len(tokens_a) > len(tokens_b):
            tokens_a.pop()
        else:
            tokens_b.pop()

In [8]:
def accuracy(out, labels):
    outputs = np.argmax(out, axis=-1)
    pred.extend(outputs)
    true.extend(labels)
    return np.sum(outputs == labels)


In [9]:
pred = list()
true = list()


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
    device, n_gpu, False, False))

02/06/2022 16:45:21 - INFO - __main__ -   device: cuda n_gpu: 1, distributed training: False, 16-bits training: False


In [11]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [12]:
from transformers import LongformerTokenizer as tokenizer
tokenizer = tokenizer.from_pretrained('allenai/longformer-base-4096', do_lower_case=True)

In [13]:
from transformers import LongformerForSequenceClassification, AdamW, LongformerConfig,get_linear_schedule_with_warmup

# Load BertForSequenceClassification, the pretrained BERT model with a single 
# linear classification layer on top. 
model = LongformerForSequenceClassification.from_pretrained(
    "allenai/longformer-base-4096", gradient_checkpointing=True,
    num_labels = 4)

model.to(device)

Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias'

LongformerForSequenceClassification(
  (longformer): LongformerModel(
    (embeddings): LongformerEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(4098, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): LongformerEncoder(
      (layer): ModuleList(
        (0): LongformerLayer(
          (attention): LongformerAttention(
            (self): LongformerSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (query_global): Linear(in_features=768, out_features=768, bias=True)
              (key_global): Linear(in_features=768, out_features=768, bias=True)
              (val

In [13]:
train_examples = None
num_train_optimization_steps = None
data_dir = '/home/yy452/rds/rds-gvdd-Yuap0gjVpKM/yy452/CLPsych2019_12/Dataset/task_a'
batch_size = 16
epochs = 75
max_seq_length = 2048

processor = CLPsychProcessor()

train_examples = processor.get_train_examples(data_dir)
validation_examples = processor.get_dev_examples(data_dir)

num_train_optimization_steps = int(
    len(train_examples) / batch_size ) * epochs
        

In [18]:
optimizer = AdamW(model.parameters(),
                  lr = 2e-5, 
                  eps = 1e-8 
                )

NameError: name 'AdamW' is not defined

In [14]:
global_step = 0
nb_tr_steps = 0
tr_loss = 0
label_list = processor.get_labels()

train_features = convert_examples_to_features(
    train_examples, label_list, max_seq_length, tokenizer)
logger.info("***** Running training *****")
logger.info("  Num examples = %d", len(train_examples))
logger.info("  Batch size = %d", batch_size)
logger.info("  Num steps = %d", num_train_optimization_steps)
all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long)
all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long)
train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)

02/06/2022 16:45:27 - INFO - __main__ -   ***** Running training *****
02/06/2022 16:45:27 - INFO - __main__ -     Num examples = 516
02/06/2022 16:45:27 - INFO - __main__ -     Batch size = 16
02/06/2022 16:45:27 - INFO - __main__ -     Num steps = 2400


In [15]:
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

In [16]:
# Total number of training steps is number of batches * number of epochs.
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)
scheduler

NameError: name 'get_linear_schedule_with_warmup' is not defined

In [None]:
for ep in trange(int(epochs), desc="Epoch"):
    model.train()
    tr_loss = 0
    nb_tr_examples, nb_tr_steps = 0, 0
    for step, batch in enumerate(train_dataloader):
        batch = tuple(t.to(device) for t in batch)
        input_ids, input_mask, segment_ids, label_ids = batch
        outputs = model(input_ids, 
            token_type_ids=None, 
            attention_mask=input_mask, 
            labels=label_ids)

        # The call to `model` always returns a tuple, so we need to pull the 
        # loss value out of the tuple.
        loss = outputs[0]

        # Accumulate the training loss over all of the batches so that we can
        # calculate the average loss at the end. `loss` is a Tensor containing a
        # single value; the `.item()` function just returns the Python value 
        # from the tensor.

        tr_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        nb_tr_examples += input_ids.size(0)
        nb_tr_steps += 1
        
        optimizer.step()
        scheduler.step()
        global_step += 1

    eval_examples = processor.get_dev_examples(data_dir)
    eval_features = convert_examples_to_features(
        eval_examples, label_list, max_seq_length, tokenizer)
    print("\n")
    print("Running evaluation for epoch: {}".format(ep))
    all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
    all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
    all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
    eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
    
    # Run prediction for full data
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=batch_size)

    model.eval()
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0

    for input_ids, input_mask, segment_ids, label_ids in eval_dataloader:
        input_ids = input_ids.to(device)
        input_mask = input_mask.to(device)
        segment_ids = segment_ids.to(device)
        label_ids = label_ids.to(device)

        with torch.no_grad():
            outputs = model(input_ids, 
                            token_type_ids=None, 
                            attention_mask=input_mask)

        # Get the "logits" output by the model. The "logits" are the output
        # values prior to applying an activation function like the softmax.
        logits = outputs[0]

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.to('cpu').numpy()
        tmp_eval_accuracy = accuracy(logits, label_ids)

        eval_accuracy += tmp_eval_accuracy

        nb_eval_examples += input_ids.size(0)
        nb_eval_steps += 1

    eval_accuracy = eval_accuracy / nb_eval_examples
    loss = tr_loss/nb_tr_steps
    result = {'eval_accuracy': eval_accuracy,
              'global_step': global_step,
              'loss': loss}

    for key in sorted(result.keys()):
        print(key, str(result[key]))
    print()

Epoch:   0%|          | 0/75 [00:00<?, ?it/s]



Running evaluation for epoch: 0


Epoch:   1%|▏         | 1/75 [02:49<3:29:31, 169.88s/it]

eval_accuracy 0.38596491228070173
global_step 33
loss 1.4735772212346394



Running evaluation for epoch: 1


Epoch:   3%|▎         | 2/75 [05:39<3:26:39, 169.85s/it]

eval_accuracy 0.38596491228070173
global_step 66
loss 1.4349814581148552



Running evaluation for epoch: 2


Epoch:   4%|▍         | 3/75 [08:29<3:23:49, 169.86s/it]

eval_accuracy 0.12280701754385964
global_step 99
loss 1.4169429576758183



Running evaluation for epoch: 3


Epoch:   5%|▌         | 4/75 [11:19<3:20:57, 169.82s/it]

eval_accuracy 0.38596491228070173
global_step 132
loss 1.4561178142374211



Running evaluation for epoch: 4


Epoch:   7%|▋         | 5/75 [14:09<3:18:06, 169.81s/it]

eval_accuracy 0.38596491228070173
global_step 165
loss 1.4051257660894683



Running evaluation for epoch: 5


Epoch:   8%|▊         | 6/75 [16:58<3:15:15, 169.80s/it]

eval_accuracy 0.38596491228070173
global_step 198
loss 1.3880436528812756



Running evaluation for epoch: 6


Epoch:   9%|▉         | 7/75 [19:48<3:12:25, 169.79s/it]

eval_accuracy 0.12280701754385964
global_step 231
loss 1.3813278675079346



Running evaluation for epoch: 7


Epoch:  11%|█         | 8/75 [22:38<3:09:35, 169.79s/it]

eval_accuracy 0.38596491228070173
global_step 264
loss 1.3855329318480059



Running evaluation for epoch: 8


Epoch:  12%|█▏        | 9/75 [25:28<3:06:45, 169.79s/it]

eval_accuracy 0.38596491228070173
global_step 297
loss 1.4083494525967222



Running evaluation for epoch: 9


Epoch:  13%|█▎        | 10/75 [28:17<3:03:55, 169.77s/it]

eval_accuracy 0.38596491228070173
global_step 330
loss 1.376027024153507



Running evaluation for epoch: 10


Epoch:  15%|█▍        | 11/75 [31:07<3:01:04, 169.76s/it]

eval_accuracy 0.38596491228070173
global_step 363
loss 1.3905141642599395



Running evaluation for epoch: 11


Epoch:  16%|█▌        | 12/75 [33:57<2:58:14, 169.76s/it]

eval_accuracy 0.38596491228070173
global_step 396
loss 1.3872032021031235



Running evaluation for epoch: 12


Epoch:  17%|█▋        | 13/75 [36:47<2:55:23, 169.74s/it]

eval_accuracy 0.38596491228070173
global_step 429
loss 1.3761679367585615



Running evaluation for epoch: 13


Epoch:  19%|█▊        | 14/75 [39:36<2:52:33, 169.73s/it]

eval_accuracy 0.38596491228070173
global_step 462
loss 1.3660083214441936



Running evaluation for epoch: 14


Epoch:  20%|██        | 15/75 [42:26<2:49:43, 169.73s/it]

eval_accuracy 0.12280701754385964
global_step 495
loss 1.3952254779411084



Running evaluation for epoch: 15


Epoch:  21%|██▏       | 16/75 [45:16<2:46:54, 169.73s/it]

eval_accuracy 0.38596491228070173
global_step 528
loss 1.3827088818405613



Running evaluation for epoch: 16


Epoch:  23%|██▎       | 17/75 [48:06<2:44:05, 169.75s/it]

eval_accuracy 0.38596491228070173
global_step 561
loss 1.376495574459885



Running evaluation for epoch: 17


Epoch:  24%|██▍       | 18/75 [50:55<2:41:16, 169.76s/it]

eval_accuracy 0.38596491228070173
global_step 594
loss 1.3702337886347915



Running evaluation for epoch: 18


Epoch:  25%|██▌       | 19/75 [53:45<2:38:25, 169.75s/it]

eval_accuracy 0.38596491228070173
global_step 627
loss 1.3842973420114228



Running evaluation for epoch: 19


Epoch:  27%|██▋       | 20/75 [56:35<2:35:36, 169.75s/it]

eval_accuracy 0.38596491228070173
global_step 660
loss 1.3710881724502102



Running evaluation for epoch: 20


Epoch:  28%|██▊       | 21/75 [59:25<2:32:45, 169.74s/it]

eval_accuracy 0.38596491228070173
global_step 693
loss 1.3670334815979004



Running evaluation for epoch: 21


Epoch:  29%|██▉       | 22/75 [1:02:14<2:29:57, 169.77s/it]

eval_accuracy 0.38596491228070173
global_step 726
loss 1.365999333786242



Running evaluation for epoch: 22


Epoch:  31%|███       | 23/75 [1:05:04<2:27:08, 169.79s/it]

eval_accuracy 0.38596491228070173
global_step 759
loss 1.3940035971728237



Running evaluation for epoch: 23


Epoch:  32%|███▏      | 24/75 [1:07:54<2:24:20, 169.82s/it]

eval_accuracy 0.38596491228070173
global_step 792
loss 1.3780875061497544



Running evaluation for epoch: 24


Epoch:  33%|███▎      | 25/75 [1:10:44<2:21:32, 169.84s/it]

eval_accuracy 0.38596491228070173
global_step 825
loss 1.3806929949558142



Running evaluation for epoch: 25


Epoch:  35%|███▍      | 26/75 [1:13:34<2:18:41, 169.84s/it]

eval_accuracy 0.38596491228070173
global_step 858
loss 1.3691715435548262



Running evaluation for epoch: 26


Epoch:  36%|███▌      | 27/75 [1:16:24<2:15:51, 169.83s/it]

eval_accuracy 0.38596491228070173
global_step 891
loss 1.383346676826477



Running evaluation for epoch: 27


Epoch:  37%|███▋      | 28/75 [1:19:14<2:13:04, 169.89s/it]

eval_accuracy 0.38596491228070173
global_step 924
loss 1.36636918963808



Running evaluation for epoch: 28


Epoch:  39%|███▊      | 29/75 [1:22:04<2:10:14, 169.88s/it]

eval_accuracy 0.38596491228070173
global_step 957
loss 1.3617171301986233



Running evaluation for epoch: 29


Epoch:  40%|████      | 30/75 [1:24:53<2:07:23, 169.86s/it]

eval_accuracy 0.38596491228070173
global_step 990
loss 1.3780423005421956



Running evaluation for epoch: 30


Epoch:  41%|████▏     | 31/75 [1:27:43<2:04:33, 169.85s/it]

eval_accuracy 0.38596491228070173
global_step 1023
loss 1.382594560131882



Running evaluation for epoch: 31


Epoch:  43%|████▎     | 32/75 [1:30:33<2:01:43, 169.84s/it]

eval_accuracy 0.38596491228070173
global_step 1056
loss 1.3761144479115803



Running evaluation for epoch: 32


Epoch:  44%|████▍     | 33/75 [1:33:23<1:58:52, 169.83s/it]

eval_accuracy 0.38596491228070173
global_step 1089
loss 1.3757670070185806



Running evaluation for epoch: 33


Epoch:  45%|████▌     | 34/75 [1:36:13<1:56:02, 169.82s/it]

eval_accuracy 0.38596491228070173
global_step 1122
loss 1.3655267520384355



Running evaluation for epoch: 34


Epoch:  47%|████▋     | 35/75 [1:39:02<1:53:12, 169.82s/it]

eval_accuracy 0.38596491228070173
global_step 1155
loss 1.3671010624278674



Running evaluation for epoch: 35


Epoch:  48%|████▊     | 36/75 [1:41:52<1:50:22, 169.82s/it]

eval_accuracy 0.38596491228070173
global_step 1188
loss 1.3708031285892834



Running evaluation for epoch: 36


Epoch:  49%|████▉     | 37/75 [1:44:42<1:47:33, 169.82s/it]

eval_accuracy 0.38596491228070173
global_step 1221
loss 1.3647957462252993



Running evaluation for epoch: 37


Epoch:  51%|█████     | 38/75 [1:47:32<1:44:43, 169.83s/it]

eval_accuracy 0.38596491228070173
global_step 1254
loss 1.3683692397493306



Running evaluation for epoch: 38


Epoch:  52%|█████▏    | 39/75 [1:50:22<1:41:54, 169.85s/it]

eval_accuracy 0.38596491228070173
global_step 1287
loss 1.367358066818931



Running evaluation for epoch: 39


Epoch:  53%|█████▎    | 40/75 [1:53:12<1:39:05, 169.86s/it]

eval_accuracy 0.38596491228070173
global_step 1320
loss 1.3887385924657185



Running evaluation for epoch: 40


Epoch:  55%|█████▍    | 41/75 [1:56:02<1:36:15, 169.87s/it]

eval_accuracy 0.38596491228070173
global_step 1353
loss 1.390536525032737



In [None]:
pred = list()
true = list()
eval_examples = processor.get_test_examples(data_dir)
eval_features = convert_examples_to_features(
    eval_examples, label_list, max_seq_length, tokenizer)
complete_user_ids = list()
for example in eval_examples:
    complete_user_ids.append(example.guid)	
logger.info("***** Running evaluation *****")
logger.info("  Num examples = %d", len(eval_examples))
logger.info("  Batch size = %d", batch_size)
all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
all_label_ids = torch.tensor([0 for f in eval_features], dtype=torch.long)

eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
# Run prediction for full data
eval_sampler = SequentialSampler(eval_data)
eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=batch_size)


model.eval()

eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0
complete_label_ids = list()
complete_outputs = list()
complete_logits = []
for input_ids, input_mask, segment_ids, label_ids in eval_dataloader:
    input_ids = input_ids.to(device)
    input_mask = input_mask.to(device)
    segment_ids = segment_ids.to(device)
    label_ids = label_ids.to(device)

    with torch.no_grad():
        outputs = model(input_ids, 
                        token_type_ids=None, 
                        attention_mask=input_mask)

    # Get the "logits" output by the model. The "logits" are the output
    # values prior to applying an activation function like the softmax.
    logits = outputs[0]

    # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()
    
    outputs = np.argmax(logits, axis=1)
    complete_logits.append(logits)
    complete_outputs.extend(outputs)
    label_ids = label_ids.to('cpu').numpy()
    complete_label_ids.extend(label_ids)
    tmp_eval_accuracy = accuracy(logits, label_ids)

    eval_accuracy += tmp_eval_accuracy

    nb_eval_examples += input_ids.size(0)
    nb_eval_steps += 1
    
eval_accuracy = eval_accuracy / nb_eval_examples
loss = tr_loss/nb_tr_steps
result = {'eval_accuracy': eval_accuracy,
          'global_step': global_step,
          'loss': loss}
#     with torch.no_grad():
#         tmp_eval_loss = model(input_ids, segment_ids, input_mask, label_ids)
#         logits = model(input_ids, segment_ids, input_mask)

#     logits = logits.detach().cpu().numpy()
#     outputs = np.argmax(logits, axis=1)
#     complete_outputs.extend(outputs)
#     label_ids = label_ids.to('cpu').numpy()
#     complete_label_ids.extend(label_ids)
#     tmp_eval_accuracy = accuracy(logits, label_ids)

#     eval_loss += tmp_eval_loss.mean().item()
#     eval_accuracy += tmp_eval_accuracy

#     nb_eval_examples += input_ids.size(0)
#     nb_eval_steps += 1



for i in range(len(complete_logits)):
    print(complete_outputs[i], complete_logits[i])

In [None]:
output_dir = "/home/yy452/rds/rds-gvdd-Yuap0gjVpKM/yy452/CLPsych2019_12/output/longformer_1024"


In [None]:
model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
output_model_file = os.path.join(output_dir, "longformer.bin")
torch.save(model_to_save.state_dict(), output_model_file)
output_config_file = os.path.join(output_dir, "config.json")
with open(output_config_file, 'w') as f:
    f.write(model_to_save.config.to_json_string())	

In [None]:
output_file = os.path.join(output_dir, "longformer_a.csv")
outcsv = open(output_file,'w', encoding = 'utf8', newline='')
writer = csv.writer(outcsv,quotechar = '"')
writer.writerow(["User","results"])
for user,pred in zip(complete_user_ids, complete_outputs):
    writer.writerow([user,pred])
outcsv.close()

In [24]:
from pytorch_pretrained_bert.modeling import BertForSequenceClassification, BertConfig
# output_dir = "/home/yy452/rds/rds-gvdd-Yuap0gjVpKM/yy452/CLPsych2019_12/output/01022022_expert_1000"
output_model_file = os.path.join(output_dir, "pytorch_model.bin")
output_config_file = os.path.join(output_dir, "config.json")
config = BertConfig.from_json_file(output_config_file)
model_state_dict = torch.load(output_model_file)
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", state_dict=model_state_dict, num_labels=4)
model.to(device)

02/02/2022 12:40:46 - INFO - pytorch_pretrained_bert.modeling -   Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
02/02/2022 12:40:50 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /home/yy452/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
02/02/2022 12:40:50 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /home/yy452/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /tmp/tmpzlszxjsa
02/02/2022 12:40:54 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_r

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
   

## load longformer model

In [17]:
from transformers import LongformerForSequenceClassification, LongformerConfig
output_dir = "/home/yy452/rds/rds-gvdd-Yuap0gjVpKM/yy452/CLPsych2019_12/output/longformer_2048_2"
output_model_file = os.path.join(output_dir, "longformer.bin")
output_config_file = os.path.join(output_dir, "config.json")
config = LongformerConfig.from_json_file(output_config_file)
model_state_dict = torch.load(output_model_file)
model_lf = LongformerForSequenceClassification.from_pretrained("allenai/longformer-base-4096", state_dict=model_state_dict, num_labels=4)
model_lf.to(device)

LongformerForSequenceClassification(
  (longformer): LongformerModel(
    (embeddings): LongformerEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(4098, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): LongformerEncoder(
      (layer): ModuleList(
        (0): LongformerLayer(
          (attention): LongformerAttention(
            (self): LongformerSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (query_global): Linear(in_features=768, out_features=768, bias=True)
              (key_global): Linear(in_features=768, out_features=768, bias=True)
              (val

## load bert model

In [25]:
output_dir = "/home/yy452/rds/rds-gvdd-Yuap0gjVpKM/yy452/CLPsych2019_12/output/task_a_res"
output_model_file = os.path.join(output_dir, "pytorch_model.bin")
output_config_file = os.path.join(output_dir, "config.json")
config = BertConfig.from_json_file(output_config_file)
model_state_dict = torch.load(output_model_file)
model2 = BertForSequenceClassification.from_pretrained("bert-base-uncased", state_dict=model_state_dict, num_labels=4)
model2.to(device)

02/02/2022 12:41:00 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /home/yy452/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
02/02/2022 12:41:00 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /home/yy452/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /tmp/tmpoobtmjja
02/02/2022 12:41:04 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
 

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
   

In [26]:
pred = list()
true = list()
eval_examples = processor.get_test_examples(data_dir)
eval_features = convert_examples_to_features(
    eval_examples, label_list, max_seq_length, tokenizer)
complete_user_ids = list()
for example in eval_examples:
    complete_user_ids.append(example.guid)	
logger.info("***** Running evaluation *****")
logger.info("  Num examples = %d", len(eval_examples))
logger.info("  Batch size = %d", batch_size)
all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
all_label_ids = torch.tensor([0 for f in eval_features], dtype=torch.long)

eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
# Run prediction for full data
eval_sampler = SequentialSampler(eval_data)
eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=batch_size)

model.eval()
model2.eval()
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0
complete_label_ids = list()
complete_outputs = list()
complete_logits = []
for input_ids, input_mask, segment_ids, label_ids in eval_dataloader:
    input_ids = input_ids.to(device)
    input_mask = input_mask.to(device)
    segment_ids = segment_ids.to(device)
    label_ids = label_ids.to(device)

    with torch.no_grad():
        logits1 = model(input_ids, segment_ids, input_mask)
        logits2 = model2(input_ids, segment_ids, input_mask)

    # Get the "logits" output by the model. The "logits" are the output
    # values prior to applying an activation function like the softmax.
#     logits1 = outputs[0]
#     logits2 = outputs2[0]
    
    

    # Move logits and labels to CPU
    logits1 = logits1.detach().cpu().numpy()
    logits2 = logits2.detach().cpu().numpy()
    
    logits = (logits1 + logits2)/2.0
    
    outputs = np.argmax(logits, axis=1)
    complete_logits.append(logits)
    complete_outputs.extend(outputs)
    label_ids = label_ids.to('cpu').numpy()
    complete_label_ids.extend(label_ids)
    tmp_eval_accuracy = accuracy(logits, label_ids)

    eval_accuracy += tmp_eval_accuracy

    nb_eval_examples += input_ids.size(0)
    nb_eval_steps += 1

# eval_accuracy = eval_accuracy / nb_eval_examples
# loss = tr_loss/nb_tr_steps
# result = {'eval_accuracy': eval_accuracy,
#           'global_step': global_step,
#           'loss': loss}

# for i in range(len(complete_logits)):
#     print(complete_outputs[i], complete_logits[i])

02/02/2022 12:41:06 - INFO - __main__ -   ***** Running evaluation *****
02/02/2022 12:41:06 - INFO - __main__ -     Num examples = 125
02/02/2022 12:41:06 - INFO - __main__ -     Batch size = 16


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.