In [17]:
from transformers import BertTokenizer
import torch
DEVICE = torch.device('cuda')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
CHECKPOINT_PATH = '/home/student/workspace/Truthseeker/final_checkpoint'

In [18]:
def encode(sentence):
    return tokenizer.encode_plus(
                        sentence,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 64,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )

In [19]:
from transformers import BertForSequenceClassification, AdamW, BertConfig

# Load BertForSequenceClassification, the pretrained BERT model with a single 
# linear classification layer on top. 
model = BertForSequenceClassification.from_pretrained(
    CHECKPOINT_PATH, # Use the 12-layer BERT model, with an uncased vocab.
    num_labels = 2, # The number of output labels--2 for binary classification.
                    # You can increase this for multi-class tasks.   
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)

#Loading from statedict
model.load_state_dict(torch.load('final.ckpt'))

# Tell pytorch to run this model on the GPU.
model.cuda()


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [20]:
import pandas as pd
DATASET_PATH = "/home/student/datasets/TruthSeeker2023/Truth_Seeker_Model_Dataset.csv"
df = pd.read_csv(DATASET_PATH)

print('Number of training sentences: {:,}\n'.format(df.shape[0]))

# Display 10 random rows from the data.
df = df.sample(frac=1)

Number of training sentences: 134,198



In [21]:
sentences = 'Statement: ' + df['statement'] + '| Tweet: ' +df['tweet']
labels = df["BinaryNumTarget"].values

## Inference

In [22]:
for i in range(1, 100):
    encoded_sentence_dict = encode(sentences[i])
    #print (sentences[i])
    #print (encoded_sentence_dict['input_ids'])
    #print (tokenizer.decode(encoded_sentence_dict['input_ids'][0]))
    #print (encoded_sentence_dict)
    output = model(
            encoded_sentence_dict['input_ids'].to(DEVICE),
            token_type_ids=None, 
            attention_mask=encoded_sentence_dict['attention_mask'].to(DEVICE), return_dict=True)
    print (output.logits, labels[i])

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


tensor([[-7.0100,  7.0272]], device='cuda:0', grad_fn=<AddmmBackward0>) 0.0
tensor([[-7.0134,  7.0322]], device='cuda:0', grad_fn=<AddmmBackward0>) 1.0
tensor([[-6.9947,  7.0076]], device='cuda:0', grad_fn=<AddmmBackward0>) 1.0
tensor([[-6.9979,  7.0151]], device='cuda:0', grad_fn=<AddmmBackward0>) 1.0
tensor([[-6.9918,  7.0070]], device='cuda:0', grad_fn=<AddmmBackward0>) 1.0
tensor([[-7.0144,  7.0324]], device='cuda:0', grad_fn=<AddmmBackward0>) 1.0
tensor([[-7.0112,  7.0294]], device='cuda:0', grad_fn=<AddmmBackward0>) 1.0
tensor([[-7.0027,  7.0198]], device='cuda:0', grad_fn=<AddmmBackward0>) 1.0
tensor([[-6.9759,  6.9822]], device='cuda:0', grad_fn=<AddmmBackward0>) 0.0
tensor([[-6.9938,  7.0016]], device='cuda:0', grad_fn=<AddmmBackward0>) 0.0
tensor([[-7.0153,  7.0297]], device='cuda:0', grad_fn=<AddmmBackward0>) 0.0
tensor([[-6.9939,  7.0099]], device='cuda:0', grad_fn=<AddmmBackward0>) 1.0
tensor([[-6.9972,  7.0131]], device='cuda:0', grad_fn=<AddmmBackward0>) 1.0
tensor([[-7.