In [29]:
import pandas as pd
import torch

from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from transformers import AutoModel, AutoTokenizer

prefix = "/home/jansenwong/workspace/68611_final/me-aesdata/"
MODEL = 'distilbert-base-uncased'

class AESDataset(Dataset): ## Fix dataset and maybe rename to ASAPDataset or something ##
    """Short answer scoring dataset"""
    def __init__(self, input_fn):
        self.data = pd.read_csv(input_fn, sep='\t')
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL)
        self.max_len = 512

    def __getitem__(self, index):
        text = self.data.EssayText.iat[index]
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            
            'targets': torch.tensor(self.data.Score1.iat[index], dtype=torch.float),
            'essay_set': torch.tensor(self.data.EssaySet.iat[index], dtype=torch.int),
        }

    def __len__(self):
        return len(self.data)


In [30]:
def get_domain1_ranges():
    ''' Assuming this is correct:
    https://github.com/nusnlp/nea/blob/3673d2af408d5a5cb22d0ed6ff1cd0b25a0a53aa/nea/asap_reader.py '''
    return {
        1: (2, 12),
        2: (1, 6),
        3: (0, 3),
        4: (0, 3),
        5: (0, 4),
        6: (0, 4),
        7: (0, 30),
        8: (0, 60)
    }

domain_ranges = get_domain1_ranges()

domain_ranges = [[0, 0]] + [domain_ranges[i] for i in range(1, 9)]
# move whole thing to cuda
domain_ranges = torch.tensor(domain_ranges, dtype=torch.float).cuda()

In [32]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.trainer.trainer import Trainer
import logging

logger = logging.getLogger(__name__)


def top_one_probability(scores):
    # Define the function Φ(·) as the exponential function
    phi = torch.exp(scores)
    
    # Compute the top one probabilities
    top_one_prob = phi / torch.sum(phi)
    
    return top_one_prob

def ranking_objective_function(true_scores, predicted_scores):
    # Compute the top one probabilities for the true and predicted scores
    true_probs = top_one_probability(true_scores)
    predicted_probs = top_one_probability(predicted_scores)

    # use pytorch cross entropy instead
    loss = F.cross_entropy(true_probs, predicted_probs)
    
    return loss

class BertClassifier(pl.LightningModule):
    def __init__(self, batch_size, learning_rate, early_stop):
        super().__init__()
        self.save_hyperparameters()
        logger.info(f'hyperparameters: \n{self.hparams}')
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL)
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.early_stop = early_stop
        self.vocab_size = self.tokenizer.vocab_size

        self.dense_input_dim = 768
        self.dropout_rate = 0.2

        self.bert = AutoModel.from_pretrained(MODEL)
        self.dense = torch.nn.Linear(self.dense_input_dim, self.dense_input_dim)
        self.dropout = torch.nn.Dropout(self.dropout_rate)
        self.linear = torch.nn.Linear(self.dense_input_dim, 1) ## i think change N_CLASS to 1? ##
        self.sigmoid = torch.nn.Sigmoid()


        self.epochs = torch.tensor(15, dtype=torch.float)
        self.gamma = torch.tensor(3.45388, dtype=torch.float)

    def train_dataloader(self):
        ds = AESDataset(input_fn=prefix + 'train.tsv')
        return DataLoader(ds, batch_size=self.batch_size, num_workers=4, shuffle=True)

    def val_dataloader(self):
        ds = AESDataset(input_fn=prefix + 'val.tsv')
        return DataLoader(ds, batch_size=self.batch_size, num_workers=4, shuffle=False)

    def test_dataloader(self):
        ds = AESDataset(input_fn=prefix + 'test.tsv')
        return DataLoader(ds, batch_size=self.batch_size, num_workers=4, shuffle=False)

    def forward(self, input_ids,  attention_mask):

        # pass through bert
        bert_out = self.bert(input_ids=input_ids, attention_mask=attention_mask)

        # get the hidden state
        hidden_state = bert_out[0]


        # get the first token
        pooler = hidden_state[:, 0]

        pooler = self.dense(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        pooler = self.linear(pooler)
        score = self.sigmoid(pooler)

        return score 

    def training_step(self, batch, batch_idx):
        ids, mask, y, essay_sets = batch['ids'], batch['mask'], batch['targets'], batch['essay_set']
        score = self.forward(ids, mask)
        score = score.squeeze(1)
        mse_loss = F.mse_loss(y, score)
        ranking_loss = ranking_objective_function(y, score) / 4
        tau_e = 1 / (1 + torch.exp(self.gamma * (self.epochs / 2 - self.current_epoch)))

        loss = tau_e * mse_loss + (1 - tau_e) * ranking_loss
        return loss

    def validation_step(self, batch, batch_idx):
        ids, mask, y, essay_sets = batch['ids'], batch['mask'], batch['targets'], batch['essay_set']
        score = self.forward(ids, mask)
        score = score.squeeze(1)
        mse_loss = F.mse_loss(y, score)
        ranking_loss = ranking_objective_function(y, score) / 4
        tau_e = 1 / (1 + torch.exp(self.gamma * (self.epochs / 2 - self.current_epoch)))

        loss = tau_e * mse_loss + (1 - tau_e) * ranking_loss
        y_hat = score * (domain_ranges[essay_sets][:,1] - domain_ranges[essay_sets][:,0]) + domain_ranges[essay_sets][:,0]
        y = y * (domain_ranges[essay_sets][:,1] - domain_ranges[essay_sets][:,0]) + domain_ranges[essay_sets][:,0]
        # round both of these
        y_hat = torch.round(y_hat)
        y = torch.round(y)
        # get val_acc when they are equal 
        val_acc = torch.sum(y == y_hat).item() / (len(y) * 1.0)

        self.log_dict({
            'val_loss': loss,
            'val_acc': val_acc,
            },
            prog_bar=True)

    def predict_step(self, batch, batch_idx, dataloader_idx=None):
        ids, mask, y, essay_sets = batch['ids'], batch['mask'], batch['targets'], batch['essay_set']
        score = self.forward(ids, mask)
        score = score.squeeze(1)
        return {
            'score': score,
            'y': y,
            'essay_set': essay_sets
        }

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(),
            lr=self.learning_rate)
        return optimizer

In [33]:
BATCH_SIZE = 8 # 8
LEARNING_RATE = 0.00001 # 3e-4
VAL_CHECK_INTERVAL = 100
EARLY_STOP = None

torch.set_float32_matmul_precision('high')

def train():
    model = BertClassifier(
        batch_size=BATCH_SIZE,
        learning_rate=LEARNING_RATE,
        early_stop=EARLY_STOP
        )
    print(model)
    # early_stopping = EarlyStopping('val_loss')
    trainer = Trainer(
        val_check_interval=VAL_CHECK_INTERVAL,
        # callbacks=[early_stopping],
        precision='16-mixed',
        # gpus=1,
        # accelerator="gpu",
        max_epochs=15)
    trainer.fit(model)

In [34]:
train()

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type            | Params
--------------------------------------------
0 | bert    | DistilBertModel | 66.4 M
1 | dense   | Linear          | 590 K 
2 | dropout | Dropout         | 0     
3 | linear  | Linear          | 769   
4 | sigmoid | Sigmoid         | 0     
--------------------------------------------
67.0 M    Trainable params
0         Non-trainable params
67.0 M    Total params
267.817   Total estimated model params size (MB)


BertClassifier(
  (bert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Linear(i

`Trainer.fit` stopped: `max_epochs=15` reached.


Epoch 14: 100%|██████████| 1158/1158 [02:00<00:00,  9.64it/s, v_num=72, val_loss=0.0197, val_acc=0.549]


In [35]:
# good version is version_31
scores = []
def test():
    global scores
    saved_model_path = '/home/jansenwong/workspace/68611_final/lightning_logs/version_72/checkpoints/epoch=14-step=17312.ckpt'  # lightning_logs/version_N/checkpoints/* 最优模型软链接
    model = BertClassifier.load_from_checkpoint(saved_model_path)
    model.eval()
    trainer = Trainer(accelerator="gpu")
    scores = trainer.predict(model, model.test_dataloader())

test()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting DataLoader 0: 100%|██████████| 293/293 [00:07<00:00, 37.89it/s]


In [36]:
# combine all the 'score', 'y', and 'essay_set' into one list each
scores_list = []
y_list = []
essay_set_list = []
for i in range(len(scores)):
    scores_list.append(scores[i]['score'])
    y_list.append(scores[i]['y'])
    essay_set_list.append(scores[i]['essay_set'])

In [37]:
import numpy as np

# take them out of tensor form, and combine into a numpy array, and then merge them
scores_list = [score.cpu().detach().numpy() for score in scores_list]
y_list = [y.cpu().detach().numpy() for y in y_list]
essay_set_list = [essay_set.cpu().detach().numpy() for essay_set in essay_set_list]
scores_list = np.concatenate(scores_list)
y_list = np.concatenate(y_list)
essay_set_list = np.concatenate(essay_set_list)

In [39]:
# count number of essays in each essay set
essay_set_count = {}
for essay_set in essay_set_list:
    if essay_set in essay_set_count:
        essay_set_count[essay_set] += 1
    else:
        essay_set_count[essay_set] = 1

# separate scores and y's into different lists based on essay set
scores_list_separated = []
y_list_separated = []
for essay_set in essay_set_count:
    scores_list_separated.append(scores_list[essay_set_list == essay_set])
    y_list_separated.append(y_list[essay_set_list == essay_set])

# use sklearn instead
from sklearn.metrics import cohen_kappa_score

kappas = []
final_domain_ranges = [
    [0, 3], 
    [0, 3], 
    [1, 6], 
    [0, 60], 
    [2, 12], 
    [0, 4], 
    [0, 4], 
    [0, 30]
]

domain_ranges = torch.tensor(final_domain_ranges, dtype=torch.float)

for idx in range(len(scores_list_separated)):
    # essay_set = idx + 1
    essay_set = idx
    # scale back up based on domain_ranges from essay_set and domain1_ranges
    # convert to tensor
    scores_list_separated[idx] = torch.tensor(scores_list_separated[idx])
    y_list_separated[idx] = torch.tensor(y_list_separated[idx])

    y_hat = scores_list_separated[idx] * (domain_ranges[essay_set][1] - domain_ranges[essay_set][0]) + domain_ranges[essay_set][0]
    y = y_list_separated[idx] * (domain_ranges[essay_set][1] - domain_ranges[essay_set][0]) + domain_ranges[essay_set][0]
    
    # round both of these and convert to int in np
    y_hat = torch.round(y_hat)
    y_hat = y_hat.cpu().detach().numpy().astype(int)
    y = torch.round(y)
    y = y.cpu().detach().numpy().astype(int)

    kappa = cohen_kappa_score(y_hat, y, weights='quadratic')
    kappas.append(kappa)

# print out the kappas
for idx in range(len(kappas)):
    print(f'Essay set {idx + 1} kappa: {kappas[idx]}')

# get the average kappa
kappas = torch.tensor(kappas)
print(f'Average kappa: {torch.mean(kappas)}')

Essay set 1 kappa: 0.6551736963475414
Essay set 2 kappa: 0.833433296204267
Essay set 3 kappa: 0.604530903718296
Essay set 4 kappa: 0.6456321098948272
Essay set 5 kappa: 0.7832836912388687
Essay set 6 kappa: 0.7685365825831993
Essay set 7 kappa: 0.7709455336853281
Essay set 8 kappa: 0.7560483032334594
Average kappa: 0.7271980146132233
