In [20]:
from main import BertSentClassification
import pytorch_lightning as pl
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel
from keras.preprocessing.sequence import pad_sequences
import torch as th
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split

bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
MAX_LEN = 64
label2id = {id:id+1 for id in range(-1, 3, 1)}
id2label = {v:k for k, v in label2id.items()} # reverse dict

def convert_examples_to_features(tweets, labels):
    input_ids = [
        bert_tokenizer.encode(tweet, add_special_tokens=True) for tweet in tweets
    ]
    input_ids = pad_sequences(
        input_ids,
        maxlen=MAX_LEN,
        dtype="long",
        value=bert_tokenizer.pad_token_id,
        padding="post",
        truncating="post"
    )
    input_ids = th.tensor(input_ids)
    attention_masks = th.tensor([[int(tok > 0) for tok in tweet] for tweet in input_ids])
    labels = th.tensor([label2id[label] for label in labels])

    return TensorDataset(input_ids, attention_masks, labels)


In [21]:
df = pd.read_csv("twitter_sentiment_data.csv")
dataset = convert_examples_to_features(df.message, list(df.sentiment))
train_data, val_data, train_label, val_labels = train_test_split(
    dataset,
    list(df.sentiment),
    random_state=1234,
    test_size=0.2
)
dataset = {"train": train_data, "val": val_data}

Each tweet is labelled as one of the following classes:
  -  2(News): the tweet links to factual news about climate change
  -  1(Pro): the tweet supports the belief of man-made climate change
  -  0(Neutral: the tweet neither supports nor refutes the belief of man-made climate change
  -  -1(Anti): the tweet does not believe in man-made climate change


In [25]:
    #update the following path to reflect your best model
    model = BertSentClassification.load_from_checkpoint(
    "model_checkpoint.ckpt", 
    dataset=dataset
    )
    #Set model in eval mode
    model.eval()
    input_text = "global warming is so fake the goverment is tryign to control us"
    labels = [-1]
    # Convert examples to features
    test_dataset = convert_examples_to_features([input_text], labels=[-1])
    input_ids, attention_mask, _ = next(iter(test_dataset))
    #add a new axis for both attention mask and inpu_ids
    input_ids = input_ids.unsqueeze(0)
    attention_mask = attention_mask.unsqueeze(0)
    prediction = model(input_ids, attention_mask)
    prediction = th.argmax(prediction).item()
    print(id2label[prediction])

-1
