## Packages and Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install -q pytorch-transformers

[K     |████████████████████████████████| 184kB 11.2MB/s 
[K     |████████████████████████████████| 1.2MB 17.5MB/s 
[K     |████████████████████████████████| 133kB 46.5MB/s 
[K     |████████████████████████████████| 890kB 37.1MB/s 
[K     |████████████████████████████████| 81kB 8.4MB/s 
[K     |████████████████████████████████| 7.4MB 41.2MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
[31mERROR: botocore 1.20.44 has requirement urllib3<1.27,>=1.25.4, but you'll have urllib3 1.24.3 which is incompatible.[0m


In [None]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/ed/d5/f4157a376b8a79489a76ce6cfe147f4f3be1e029b7144fa7b8432e8acb26/transformers-4.4.2-py3-none-any.whl (2.0MB)
[K     |████████████████████████████████| 2.0MB 11.2MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/71/23/2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97/tokenizers-0.10.1-cp37-cp37m-manylinux2010_x86_64.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 46.7MB/s 
Installing collected packages: tokenizers, transformers
Successfully installed tokenizers-0.10.1 transformers-4.4.2


In [None]:
import re
import pandas as pd
import numpy as np
import io
from google.colab import files
import os
import datetime
import time
import sys
import math
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from time import sleep
import torch
import torch.nn as nn
import pickle

from torch.utils.data import DataLoader
from transformers import BertForSequenceClassification, AdamW
from pytorch_transformers import BertTokenizer, BertForMaskedLM, BertForNextSentencePrediction, BertForMultipleChoice, BertForSequenceClassification

## Read in FinBERT model

In [None]:
FinBERT_classifier = BertForSequenceClassification.from_pretrained('/content/drive/My Drive/W266 Project/FinBERT-Combo_128MSL-100K/', num_labels = 3)

In [None]:
type(FinBERT_classifier)

pytorch_transformers.modeling_bert.BertForSequenceClassification

In [None]:
repr(FinBERT_classifier)

'BertForSequenceClassification(\n  (bert): BertModel(\n    (embeddings): BertEmbeddings(\n      (word_embeddings): Embedding(30522, 768, padding_idx=0)\n      (position_embeddings): Embedding(512, 768)\n      (token_type_embeddings): Embedding(2, 768)\n      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n      (dropout): Dropout(p=0.1, inplace=False)\n    )\n    (encoder): BertEncoder(\n      (layer): ModuleList(\n        (0): BertLayer(\n          (attention): BertAttention(\n            (self): BertSelfAttention(\n              (query): Linear(in_features=768, out_features=768, bias=True)\n              (key): Linear(in_features=768, out_features=768, bias=True)\n              (value): Linear(in_features=768, out_features=768, bias=True)\n              (dropout): Dropout(p=0.1, inplace=False)\n            )\n            (output): BertSelfOutput(\n              (dense): Linear(in_features=768, out_features=768, bias=True)\n              (LayerNorm): LayerNorm((768

In [None]:
print(FinBERT_classifier)

## Read in dataset and create Train, Validation, and Test sets

In [None]:
full_data = pd.read_csv('/content/drive/MyDrive/W266 Project/data_augmented_mda_no_numbers_labels_one_day_change.csv')

In [None]:
print("Full Data set:", full_data.shape, sep = '\n')
print(full_data.trinary_98_102.value_counts())

Full Data set:

(43400, 17)
1.0    32538
0.0     5471
2.0     5391
Name: trinary_98_102, dtype: int64


In [None]:
# create training and testing sets
train_texts, test_texts, train_labels, test_labels = train_test_split(full_data.mda_raw_text_no_numbers, full_data.trinary_98_102, test_size=.2)
# train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=.2)

In [None]:
print("Training set:", len(train_labels), sep='\n\n')
print(train_labels.value_counts())

Training set:

34720
1.0    26042
0.0     4357
2.0     4321
Name: trinary_98_102, dtype: int64


In [None]:
# print("Validation set:", len(val_labels), sep='\n\n')
# print(val_labels.value_counts())

In [None]:
print("Testing set:", len(test_labels), sep='\n\n')
print(test_labels.value_counts())

Testing set:

8680
1.0    6496
0.0    1114
2.0    1070
Name: trinary_98_102, dtype: int64


In [None]:
## save these so that we can load these in directly without re-randomizing our train and test set

# train_texts.to_csv('drive/MyDrive/W266 Project/final_dataset/train_texts.csv', index = False)
# train_labels.to_csv('drive/MyDrive/W266 Project/final_dataset/train_labels.csv', index = False)
# test_texts.to_csv('drive/MyDrive/W266 Project/final_dataset/test_texts.csv', index = False)
# test_labels.to_csv('drive/MyDrive/W266 Project/final_dataset/test_labels.csv', index = False)

## Tokenize and prepare dataset for training

In [None]:
# train_texts = pd.read_csv('/content/drive/MyDrive/W266 Project/final_dataset/train_texts.csv')
# train_labels = pd.read_csv('/content/drive/MyDrive/W266 Project/final_dataset/train_labels.csv')
# test_texts = pd.read_csv('/content/drive/MyDrive/W266 Project/final_dataset/test_texts.csv')
# test_labels = pd.read_csv('/content/drive/MyDrive/W266 Project/final_dataset/test_labels.csv')

In [None]:
# train_texts = train_texts.iloc[:,0]
# train_labels = train_labels.iloc[:,0]

# train_texts = train_texts.reset_index(drop=True)
# train_labels = train_labels.reset_index(drop=True)

In [None]:
# from transformers import DistilBertTokenizerFast
# tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

In [None]:
# from transformers import BertTokenizer
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
# print('Train text minimum tokens:',min(map(len,train_encodings['input_ids'])))
# print('Train text maximum tokens:',max(map(len,train_encodings['input_ids'])))

In [None]:
# Load data (deserialize)
with open('/content/drive/MyDrive/W266 Project/final_dataset/train_encodings.pickle', 'rb') as handle:
    train_pickle = pickle.load(handle)

train_labels = pd.read_csv('/content/drive/MyDrive/W266 Project/final_dataset/train_labels.csv')
train_labels = train_labels.loc[:,"seven_day_binary_100"]
train_labels = train_labels.reset_index(drop=True)

In [None]:
torch.cuda.is_available()

True

## Format data and train

In [None]:
def truncate_and_pad(encodings):
    train_encodings = encodings
    
    for idx in range(len(train_encodings['input_ids'])):
        length_of_tokens = len(train_encodings['input_ids'][idx])
        if length_of_tokens <= 512:
            num_tokens_to_add = 512 - length_of_tokens
            ending_token = train_encodings['input_ids'][idx][-1]
            starting_chunk = train_encodings['input_ids'][idx][0:-1]
            mid_chunk = [0] * num_tokens_to_add
            final_chunk = starting_chunk + mid_chunk + [ending_token]

            train_encodings['input_ids'][idx] = final_chunk
            train_encodings['token_type_ids'][idx] = train_encodings['token_type_ids'][idx][0:-1] + [0]*num_tokens_to_add + [train_encodings['token_type_ids'][idx][-1]]
            train_encodings['attention_mask'][idx] = train_encodings['attention_mask'][idx][0:-1] + [0]*num_tokens_to_add + [train_encodings['attention_mask'][idx][-1]]
        else:
            midpoint = length_of_tokens // 2
            starting_point = midpoint - 255
            ending_point = midpoint + 255

            mid_chunk = train_encodings['input_ids'][idx][starting_point:ending_point]
            starter_token = train_encodings['input_ids'][idx][0]
            ending_token = train_encodings['input_ids'][idx][-1]

            mid_chunk.insert(0, starter_token)
            mid_chunk.append(ending_token)

            train_encodings['input_ids'][idx] = mid_chunk
            train_encodings['token_type_ids'][idx] = train_encodings['token_type_ids'][idx][0:512]
            train_encodings['attention_mask'][idx] = train_encodings['attention_mask'][idx][0:512]
    return train_encodings

In [None]:
class TenQDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = torch.tensor(labels, dtype = torch.long)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


def train_model(encodings, labels, num_classes, model_type, epochs, learning_rate, checkpoint_path, use_checkpoint = False):
    train_dataset = TenQDataset(encodings, labels)

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(device)

    if model_type == 'finbert':
        model_string = '/content/drive/My Drive/W266 Project/FinBERT-Combo_128MSL-100K/'
    elif model_type == 'bert':
        model_string = 'bert-base-uncased'

    model = BertForSequenceClassification.from_pretrained(model_string, num_labels = num_classes)
    model.to(device)
    train_loader = DataLoader(train_dataset, batch_size=12, shuffle=True)
    optim = AdamW(model.parameters(), lr=learning_rate)
    PATH = checkpoint_path

    if use_checkpoint:
        ## load checkpoint
        checkpoint = torch.load(PATH)
        model.load_state_dict(checkpoint['model_state_dict'])
        optim.load_state_dict(checkpoint['optimizer_state_dict'])
        epoch = checkpoint['epoch']
        loss = checkpoint['loss']
        batch_number = checkpoint['batch']


    model.train()

    num_epochs = epochs
    batches_to_checkpoint = 500
    for epoch in range(num_epochs):
        start_epoch = datetime.datetime.now()
        for batch_number, batch in enumerate(train_loader,0):
            if batch_number % batches_to_checkpoint == 0:
                timestamped_batch = batch_number
                starting_loss = 0
                starttime = datetime.datetime.now()
            optim.zero_grad()

            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs[0]
            train_loss = loss.item()
            starting_loss += train_loss
            loss.backward()
            optim.step()

            if batch_number == 1:
                print('Starting Batch Loss - {}'.format(train_loss))
          
            if batch_number == (timestamped_batch + batches_to_checkpoint - 1):
                average_loss = starting_loss / batches_to_checkpoint
                endtime = datetime.datetime.now()
                print('Epoch {}, Batch {}: Average Loss - {}, Most Recent Batch Loss - {}'.format(epoch + 1, batch_number + 1, average_loss, train_loss))
                print('\t\tPrevious {} batches took'.format(batches_to_checkpoint), endtime - starttime)

                print('\t\tSaving model to', PATH, end = '\n\n')
                torch.save({
                    'epoch': epoch,
                    'batch': batch_number,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optim.state_dict(),
                    'loss': average_loss
                },  PATH)
        end_epoch = datetime.datetime.now()
        print('Finished Epoch {}: Final Batch Loss - {}'.format(epoch + 1, train_loss))
        print('\t\tFull Epoch took', end_epoch - start_epoch)
        torch.save({
                    'epoch': epoch,
                    'batch': batch_number,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optim.state_dict(),
                    'loss': average_loss
                },  PATH)   

In [None]:
# ## train from scratch
# encodings = chunked_tokenize(train_texts)
# processed_data = truncate_and_pad(encodings)
# train_model(processed_data, train_labels, num_classes=2)

In [None]:
## train from scratch using pickle loaded data
# encodings = chunked_tokenize(train_texts)

processed_data = truncate_and_pad(train_pickle)
train_model(processed_data, 
            train_labels, 
            num_classes=2,
            model_type = 'finbert',
            epochs = 5,
            learning_rate = 5e-8, 
            checkpoint_path = '/content/drive/MyDrive/W266 Project/finbert_model_checkpoint_seven_day_binary_100/model.pt',
            use_checkpoint = True
            )

## Load model and evaluate on test set

In [None]:
# full_data = pd.read_csv('/content/drive/MyDrive/W266 Project/data_augmented_mda_no_numbers_labels_one_day_change.csv')

# train_texts = pd.read_csv('/content/drive/MyDrive/W266 Project/final_dataset/train_texts.csv')
# train_labels = pd.read_csv('/content/drive/MyDrive/W266 Project/final_dataset/train_labels.csv')
# test_texts = pd.read_csv('/content/drive/MyDrive/W266 Project/final_dataset/test_texts.csv')
# test_labels = pd.read_csv('/content/drive/MyDrive/W266 Project/final_dataset/test_labels.csv')

In [None]:
# test_texts = test_texts.iloc[:,0]
# test_labels = test_labels.iloc[:,0]

In [None]:
# Load data (deserialize)
with open('/content/drive/MyDrive/W266 Project/final_dataset/test_encodings.pickle', 'rb') as handle:
    test_pickle = pickle.load(handle)

test_labels = pd.read_csv('/content/drive/MyDrive/W266 Project/final_dataset/test_labels.csv')
test_labels = test_labels.loc[:,"seven_day_binary_100"]
test_labels = test_labels.reset_index(drop=True)

In [None]:
# from transformers import BertTokenizer
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
# from torch.utils.data import DataLoader
# from transformers import BertForSequenceClassification, AdamW

# test_loader = DataLoader(test_dataset, batch_size=int(len(test_dataset.encodings['input_ids']) / 10))

In [None]:
# # device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# device = torch.device('cpu')
# print(device)

# model_load = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels = 3)
# model_load.to(device)
# optim_load = AdamW(model_load.parameters(), lr=5e-5)


# PATH = '/content/drive/MyDrive/W266 Project/bert_training_checkpoint/model.pt'
# checkpoint = torch.load(PATH, map_location=device)
# model_load.load_state_dict(checkpoint['model_state_dict'])
# optim_load.load_state_dict(checkpoint['optimizer_state_dict'])
# epoch = checkpoint['epoch']
# loss = checkpoint['loss']
# batch_number = checkpoint['batch']

# model_load.eval()

In [None]:
# true_labels = []
# predicted_labels = []
# for batch_number, batch in enumerate(test_loader,0):
#         input_ids = batch['input_ids'].to(device)
#         attention_mask = batch['attention_mask'].to(device)
#         labels = batch['labels'].to(device)
#         true_labels.extend(labels.tolist())

#         outputs = model_load(input_ids, attention_mask=attention_mask)
#         for row in outputs[0]:
#             top = max(row)
#             index_of_highest = [i for i,j in enumerate(row) if j == top]
#             predicted_labels.extend(index_of_highest)

  # Remove the CWD from sys.path while we load stuff.


In [None]:
# preds_col = pd.DataFrame(predicted_labels, columns = ['preds'])
# true_col = pd.DataFrame(true_labels, columns = ['true'])
# predictions = pd.concat([true_col, preds_col], axis = 1)

In [None]:
def truncate_and_pad(encodings):
    train_encodings = encodings
    
    for idx in range(len(train_encodings['input_ids'])):
        length_of_tokens = len(train_encodings['input_ids'][idx])
        if length_of_tokens <= 512:
            num_tokens_to_add = 512 - length_of_tokens
            ending_token = train_encodings['input_ids'][idx][-1]
            starting_chunk = train_encodings['input_ids'][idx][0:-1]
            mid_chunk = [0] * num_tokens_to_add
            final_chunk = starting_chunk + mid_chunk + [ending_token]

            train_encodings['input_ids'][idx] = final_chunk
            train_encodings['token_type_ids'][idx] = train_encodings['token_type_ids'][idx][0:-1] + [0]*num_tokens_to_add + [train_encodings['token_type_ids'][idx][-1]]
            train_encodings['attention_mask'][idx] = train_encodings['attention_mask'][idx][0:-1] + [0]*num_tokens_to_add + [train_encodings['attention_mask'][idx][-1]]
        else:
            midpoint = length_of_tokens // 2
            starting_point = midpoint - 255
            ending_point = midpoint + 255

            mid_chunk = train_encodings['input_ids'][idx][starting_point:ending_point]
            starter_token = train_encodings['input_ids'][idx][0]
            ending_token = train_encodings['input_ids'][idx][-1]

            mid_chunk.insert(0, starter_token)
            mid_chunk.append(ending_token)

            train_encodings['input_ids'][idx] = mid_chunk
            train_encodings['token_type_ids'][idx] = train_encodings['token_type_ids'][idx][0:512]
            train_encodings['attention_mask'][idx] = train_encodings['attention_mask'][idx][0:512]
    return train_encodings

In [None]:
class TenQDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = torch.tensor(labels, dtype = torch.long)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


def evaluate_model(encodings, labels, num_classes, model_type, checkpoint_path):
    test_dataset = TenQDataset(encodings, labels)

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(device)

    if model_type == 'finbert':
        model_string = '/content/drive/My Drive/W266 Project/FinBERT-Combo_128MSL-100K/'
    elif model_type == 'bert':
        model_string = 'bert-base-uncased'


    model_load = BertForSequenceClassification.from_pretrained(model_string, num_labels = num_classes)
    model_load.to(device)
    test_loader = DataLoader(test_dataset, batch_size=1)
    optim_load = AdamW(model_load.parameters(), lr=5e-5)


    PATH = checkpoint_path
    checkpoint = torch.load(PATH, map_location=device)
    model_load.load_state_dict(checkpoint['model_state_dict'])
    optim_load.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    batch_number = checkpoint['batch']

    model_load.eval()


    true_labels = []
    predicted_labels = []
    for batch_number, batch in enumerate(test_loader,0):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        true_labels.extend(labels.tolist())

        outputs = model_load(input_ids, attention_mask=attention_mask)
        for row in outputs[0]:
            # print(row)
            top = max(row)
            index_of_highest = [i for i,j in enumerate(row) if j == top]
            predicted_labels.extend(index_of_highest)

    preds_col = pd.DataFrame(predicted_labels, columns = ['preds'])
    true_col = pd.DataFrame(true_labels, columns = ['true'])
    predictions = pd.concat([true_col, preds_col], axis = 1)

    return predictions

In [None]:
processed_data = truncate_and_pad(test_pickle)
final_preds = evaluate_model(processed_data, 
                             test_labels, 
                             num_classes = 2, 
                             model_type = 'finbert',
                             checkpoint_path = '/content/drive/MyDrive/W266 Project/finbert_model_checkpoint_seven_day_binary_100/model.pt'
                             )

cuda


  


In [None]:
final_preds.preds.value_counts()

0    4531
1    4149
Name: preds, dtype: int64

In [None]:
final_preds.true.value_counts()

0    4356
1    4324
Name: true, dtype: int64

In [None]:
final_preds.groupby(['true','preds']).size()

true  preds
0     0        2420
      1        1936
1     0        2111
      1        2213
dtype: int64

In [None]:
# final_preds.to_csv('/content/drive/MyDrive/W266 Project/bert_model_checkpoint_seven_day_trinary_98_102/final_preds.csv', index = False)

In [None]:
def evaluate_model_from_pretrain(encodings, labels, num_classes, checkpoint_path):
    test_dataset = TenQDataset(encodings, labels)

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    print(device)

    model_load = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels = num_classes)
    model_load.to(device)
    test_loader = DataLoader(test_dataset, batch_size=1)
    optim_load = AdamW(model_load.parameters(), lr=5e-5)


    # PATH = checkpoint_path
    # checkpoint = torch.load(PATH, map_location=device)
    # model_load.load_state_dict(checkpoint['model_state_dict'])
    # optim_load.load_state_dict(checkpoint['optimizer_state_dict'])
    # epoch = checkpoint['epoch']
    # loss = checkpoint['loss']
    # batch_number = checkpoint['batch']

    model_load.eval()


    true_labels = []
    predicted_labels = []
    for batch_number, batch in enumerate(test_loader,0):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        true_labels.extend(labels.tolist())

        outputs = model_load(input_ids, attention_mask=attention_mask)
        for row in outputs[0]:
            print(row)
            top = max(row)
            # print('\t\tTop:', top, sep = ' ')
            index_of_highest = [i for i,j in enumerate(row) if j == top]
            # print('\t\tIndex:', index_of_highest, sep = ' ')
            predicted_labels.extend(index_of_highest)

    preds_col = pd.DataFrame(predicted_labels, columns = ['preds'])
    true_col = pd.DataFrame(true_labels, columns = ['true'])
    predictions = pd.concat([true_col, preds_col], axis = 1)

    return predictions

In [None]:
processed_data = truncate_and_pad(test_pickle)
final_preds_pretrain = evaluate_model_from_pretrain(processed_data, 
                             test_labels, 
                             num_classes = 2, 
                             checkpoint_path = '/content/drive/MyDrive/W266 Project/bert_model_checkpoint_one_day_binary_100/model.pt'
                             )

cuda


  


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
tensor([-0.2094,  0.2863], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.1587,  0.0785], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.1756,  0.1460], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.1464,  0.1666], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.1354,  0.1845], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.2314,  0.1991], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.2753,  0.2401], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.1754,  0.2586], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.1752,  0.1960], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.0016,  0.2005], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.1672,  0.0835], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.1687,  0.1221], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.0949,  0.2070], device='cuda:0', grad_fn=<UnbindBackward>)
tensor([-0.1441,  0.1226]

In [None]:
final_preds_pretrain.preds.value_counts()

1    8679
0       1
Name: preds, dtype: int64

In [None]:
final_preds_pretrain.true.value_counts()

1    4874
0    3806
Name: true, dtype: int64

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model_load = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels = 2)
model_load.to(device)
optim_load = AdamW(model_load.parameters(), lr=5e-6)


PATH = '/content/drive/MyDrive/W266 Project/bert_model_checkpoint_one_day_binary_100/model.pt'
checkpoint = torch.load(PATH, map_location=device)
model_load.load_state_dict(checkpoint['model_state_dict'])
optim_load.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
batch_number = checkpoint['batch']

model_load.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels = 2)
model.to(device)
optim = AdamW(model.parameters(), lr=5e-6)

# PATH = '/content/drive/MyDrive/W266 Project/bert_model_checkpoint_one_day_binary_100/model.pt'
# checkpoint = torch.load(PATH, map_location=device)
# model_load.load_state_dict(checkpoint['model_state_dict'])
# optim_load.load_state_dict(checkpoint['optimizer_state_dict'])
# epoch = checkpoint['epoch']
# loss = checkpoint['loss']
# batch_number = checkpoint['batch']

model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [None]:
model_load.state_dict()['classifier.weight'][0]

tensor([ 0.0040, -0.0007, -0.0018, -0.0233, -0.0179,  0.0006,  0.0023,  0.0151,
        -0.0107, -0.0182, -0.0090,  0.0017,  0.0172,  0.0077, -0.0047,  0.0115,
         0.0234, -0.0387,  0.0039, -0.0092,  0.0335,  0.0046, -0.0217, -0.0158,
        -0.0208, -0.0030,  0.0023, -0.0281, -0.0180, -0.0064,  0.0176,  0.0117,
        -0.0079, -0.0015,  0.0123,  0.0203,  0.0014, -0.0227, -0.0105,  0.0229,
        -0.0114, -0.0200, -0.0075, -0.0540,  0.0136,  0.0091, -0.0027, -0.0096,
         0.0053, -0.0206,  0.0248,  0.0141, -0.0011, -0.0179,  0.0010,  0.0078,
         0.0005,  0.0081, -0.0332, -0.0122, -0.0358, -0.0067, -0.0179,  0.0225,
        -0.0204,  0.0074, -0.0080,  0.0133,  0.0237,  0.0125, -0.0172, -0.0108,
        -0.0076, -0.0126, -0.0079, -0.0107, -0.0295,  0.0125,  0.0131, -0.0179,
         0.0048, -0.0024, -0.0018,  0.0026,  0.0303,  0.0196, -0.0208, -0.0056,
        -0.0059, -0.0292, -0.0004,  0.0072,  0.0153,  0.0049,  0.0286,  0.0027,
        -0.0136, -0.0179, -0.0116, -0.05

In [None]:
model.state_dict()['classifier.weight'][0]

tensor([ 1.4482e-02, -6.9611e-05,  3.3875e-02,  1.7152e-02, -1.9951e-02,
         4.0673e-02,  7.3961e-03,  1.8944e-02, -1.9960e-02,  4.1581e-02,
         3.1574e-02, -5.9357e-03,  8.6846e-03, -2.9385e-02, -8.0777e-03,
         8.3111e-03,  9.0202e-03, -8.2728e-03, -1.8894e-02, -1.0812e-02,
         1.7028e-02,  3.4429e-03, -1.9861e-02, -4.1194e-03,  2.8645e-02,
        -7.3001e-03, -1.7193e-02, -1.2476e-02,  1.1415e-02,  5.8531e-04,
        -3.0735e-02,  3.5565e-02, -1.4902e-02,  2.9362e-02, -1.2896e-02,
        -3.5882e-03, -1.4492e-02, -2.0595e-02,  1.6306e-02,  1.5280e-02,
        -4.8266e-03, -1.7178e-02, -1.8658e-02,  1.3059e-02,  4.0989e-03,
         1.0589e-02, -1.2625e-02,  1.8632e-02,  2.8249e-02, -4.1579e-02,
         3.9141e-03, -2.4041e-02,  4.0459e-02,  8.0854e-03, -1.9295e-02,
        -3.4466e-02,  6.0362e-03,  1.4995e-02, -3.8931e-02, -1.4457e-02,
         9.1001e-03,  5.3663e-05,  7.6696e-03,  1.2852e-02, -3.7079e-02,
         9.3357e-03,  2.4936e-02,  9.6161e-03, -1.1