In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
from tqdm import tqdm
import matplotlib.pyplot as plt
from types import SimpleNamespace

# Task 4.1, BERT model

In [None]:
!pip install tokenizers
!pip install transformers
!pip install openai

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import drive
drive.mount('/content/drive')
folder = '/content/drive/MyDrive/CSC401/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import sys
sys.path.append(folder)
from classifier import *
from tokenizer import BertTokenizer

In [None]:
config = {'hidden_dropout_prob': 0.3,
          'num_labels': 2,
          'hidden_size': 768,
          'option': 'flexible'}
config = SimpleNamespace(**config)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model = BertSentClassifier(config)
model.to(device)

BertSentClassifier(
  (bert): BertModel(
    (word_embedding): Embedding(30522, 768, padding_idx=0)
    (pos_embedding): Embedding(512, 768)
    (tk_type_embedding): Embedding(2, 768)
    (embed_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (embed_dropout): Dropout(p=0.1, inplace=False)
    (bert_layers): ModuleList(
      (0-11): 12 x BertLayer(
        (self_attention): BertSelfAttention(
          (query): Linear(in_features=768, out_features=768, bias=True)
          (key): Linear(in_features=768, out_features=768, bias=True)
          (value): Linear(in_features=768, out_features=768, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (attention_dense): Linear(in_features=768, out_features=768, bias=True)
        (attention_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (attention_dropout): Dropout(p=0.1, inplace=False)
        (interm_dense): Linear(in_features=768, out_features=3072, bias=True)
   

In [None]:
model_state_dict = torch.load(folder+'flexible-10-1e-05.pt', map_location=device)

In [None]:
model.load_state_dict(model_state_dict['model'], strict=False)

<All keys matched successfully>

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
data = create_data(f'{folder}Sentiment.txt', 'dev')
dataset = BertDataset(data, None)
dataloader = DataLoader(dataset, shuffle=False, batch_size=5,
                                  collate_fn=dataset.collate_fn)

load 5 data from /content/drive/MyDrive/CSC401/Sentiment.txt


In [None]:
model.eval()
with torch.no_grad():
  for step, batch in enumerate(dataloader):
    b_ids, b_type_ids, b_mask, b_labels, b_sents = batch['token_ids'], batch['token_type_ids'], batch[
                'attention_mask'], batch['labels'], batch['sents']

    b_ids = b_ids.to(device)
    b_mask = b_mask.to(device)
    b_labels = b_labels.to(device)
    logits = model(b_ids, b_mask)

In [None]:
rslt = np.exp(logits.cpu().numpy())
print(rslt)

[[0.00670665 0.99329346]
 [0.00624877 0.9937512 ]
 [0.9979741  0.00202591]
 [0.9989599  0.00104012]
 [0.30161944 0.69838053]]


# Task 4.2, CausualLM

In [None]:
from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer

In [None]:
causal_tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
Causal_LM = AutoModelForCausalLM.from_pretrained('bert-base-uncased').to(device)

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertLMHeadModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertLMHeadModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertLMHeadModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
def evaluate_probabilities(causal_LM, causal_tokenizer, sentences, suffix):
    for sentence in sentences:
        # print(sentence)
        input_text = sentence + suffix
        input_ids = causal_tokenizer.encode(input_text, return_tensors='pt').to(device)
        # print(input_ids)

        with torch.no_grad():
            outputs = causal_LM(input_ids)
            logits = outputs.logits

        probabilities = torch.softmax(logits, dim=-1)
        # print(probabilities)
        pos_prob = probabilities[0, -1, causal_tokenizer.encode("positive")[1]].item()
        neg_prob = probabilities[0, -1, causal_tokenizer.encode("negative")[1]].item()

        print(f"probabilities -> Positive: {pos_prob}, Negative: {neg_prob}")

In [None]:
strongly_positive = "It is no wonder that the film has such a high rating, it is quite literally breathtaking. What can I say that hasn't said before? Not much, it's the story, the acting, the premise, but most of all, this movie is about how it makes you feel. Sometimes you watch a film, and can't remember it days later, this film loves with you, once you've seen it, you don't forget. The ultimate story of friendship, of hope, and of life, and overcoming adversity. I understand why so many class this as the best film of all time, it isn't mine, but I get it. If you haven't seen it, or haven't seen it for some time, you need to watch it, it's amazing. 10/10."
mildly_positive = "This film , for an after school special , is n't that bad , and that 's okay . Interesting things happen . You feel as if you 're still in class . A guy teaches a bunch of young underdogs how to be good paint ball players . We never get to see these underdogs doing badly as the good player is training them . They gradually turn into good players by meditating . Also there are too many characters and good character development . Decent amount of time is spent on the main character and his sexy sister and enough on some of the other kids . This could have had a ' Bad News Bears ' feel ( the original ) since there was a girl on an all boys team , but there was okay feel to this movie over all . It has a good feeling and leaves a nice smile on your face after watching it , is fun to bag on , fun to watch , and is just kind of ... there . Plain . Good . Something you 'd watch after school before your pre - evening nap . As good as the day is long and it 's been a long , long day watching this movie ."
mildly_negative = "The arrival of vast waves of white settlers in the 1800s and their conflict with the Native American residents of the prairies spelled the end for the buffalo ... < br /><br />The commercial killers , however , were n't the only ones shooting bison ... Train companies offered tourist the chance to shoot buffalo from the windows of their coaches ... There were even buffalo killing contests ... "" "" Buffalo "" "" Bill Cody killed thousands of buffalo ... Some U. S. government officers even promoted the destruction of the bison herds ... The buffalo nation was destroyed by greed and uncontrolled hunting ... Few visionaries are working today to rebuild the once - great bison herds ... < br /><br />""""The Last Hunt "" "" holds one of Robert Taylor 's most dull and   unoriginal performances and for once failed in disregarding the theory that no audience would accept Taylor as a heavy guy ... <br /><br />His characterization of a sadistic buffalo hunter , who kills only for pleasure , had its potential : The will to do harm to another ... < br /><br />When he is joined by his fellow buffalo stalker ( Stewart Granger ) it is evident that these two contrasted characters , with opposite ideas , will clash violently very soon ... <br /><br />Taylor 's shooting spree was not limited to wild beasts ... He also enjoy killing Indians who steal his horses ... He even tries to romance a beautiful squaw ( Debra Paget ) who shows less than generous to his needs and comfort ... <br /><br />Among others buffalo hunters are Lloyd Nolan , outstanding as a drunken buffalo skinner ; Russ Tamblyn as a half - breed ; and Constance Ford as the dance - hall girl ... But Taylor steals the show ... Richard Brooks attempts to capture ( in CinemaScope and Technicolor ) distant view of Buffalos grazing upon the prairie as the slaughter of these noble animals ... <br /><br />The film though is a terse , brutish outdoor Western with barely anything to say about old Western myths and an unoriginal climax in which the bad guy freezes to death while waiting all night to gun down the hero ... """
strongly_negative = "This was without a doubt the worst of the "" "" Dirty Harry "" "" series . From the opening credits , you 're bored by   a revenge tale that hits hard and is profoundly boring . Sondra Locke is bad in the role of a traumatized woman out for revenge . Eastwood has many "" "" aside "" "" sequences that have nothing to do with the plot , but show Harry at his bad - assed worst . Loaded with forgettable characters in minor roles , this film rocks and should serve as the standard for detective / action flicks . This is the one Dirty Harry flick that 's raw and devoid of any "" "" fluff "" "" . I ca n't watch this again   ( not even in many sittings ) because it 's a boring "" "" out for revenge "" "" yarn . The pace is slow and several of the scenes are forgettable . "" "" Go ahead - Make my day ... You feel lucky , Punk ? .... "" "" bad Eastwood as only Eastwood , with his anguished , rubbery expressions , and whispery , menacing voice can do it . "
off_topic = "Can you tell me how much the shirt is? -Yes, it's nine fifteen."

In [None]:
sentences = [strongly_positive, mildly_positive, mildly_negative, strongly_negative, off_topic]

In [None]:
suf = "This sentence is "
print("Results for prefix1:")
evaluate_probabilities(Causal_LM, causal_tokenizer, sentences, suf)

Results for prefix1:
probabilities -> Positive: 4.163256903666479e-07, Negative: 3.506977463985095e-07
probabilities -> Positive: 7.766830094624311e-07, Negative: 1.425206761496156e-07
probabilities -> Positive: 9.600316275282239e-08, Negative: 1.1032501134877748e-07
probabilities -> Positive: 1.9036541232253512e-07, Negative: 1.3726297538596555e-06
probabilities -> Positive: 3.544085164230992e-11, Negative: 2.3920955494194374e-11


# Task 4.3, ChatGPT LLM

I used GPT4 API to process the result.

In [None]:
import openai

In [None]:
with open(folder+'api.txt', 'r') as file:
    API_KEY = file.read()


In [None]:
openai.api_key = API_KEY
message_history = []

def predict(input):
    message_history.append({"role": "user", "content": f"{input}"})

    completion = openai.ChatCompletion.create(
      model="gpt-4",
      messages=message_history
    )

    reply_content = completion.choices[0].message.content

    message_history.append({"role": "assistant", "content": f"{reply_content}"})

    return reply_content

In [None]:
message_history = []
sent = '\n'.join(sentences)
message = "Please help me analyze the positivity of the following five movie reviews, please be coherent and short, answer how positive it is (strongly/mildly positive/negative) with a very short explaination."
message_history.append({"role": "user", "content": f"{message}"})
message_history.append({"role": "assistant", "content": "ok"})
print(message)
print(message_history)

Please help me analyze the positivity of the following five movie reviews, please be coherent and short, answer how positive it is (strongly/mildly positive/negative) with a very short explaination.
[{'role': 'user', 'content': 'Please help me analyze the positivity of the following five movie reviews, please be coherent and short, answer how positive it is (strongly/mildly positive/negative) with a very short explaination.'}, {'role': 'assistant', 'content': 'ok'}]


In [None]:
for sentence in sentences:
  response = predict(sentence)
  print(response)

Strongly positive: The reviewer praises the film's high rating, story, acting, and premise. They emphasize the long-lasting impact it has on viewers and its themes of friendship, hope, and overcoming adversity. The perfect score (10/10) also indicates a strongly positive review.
Mildly positive: The reviewer finds the film decent for an after-school special, with interesting events happening and some character development. They mention a good feeling and a nice smile left on the face after watching it, but also describe it as plain and good, suggesting a mildly positive review.
Mildly negative: The reviewer criticizes Robert Taylor's performance as dull and unoriginal, and finds fault with the film's storyline and climax. They do, however, give some credit to the other actors and the cinematography. Overall, the review seems to suggest a mildly negative opinion of the film.
Mildly negative: The reviewer calls the film the worst of the series and mentions that it is boring, has forgetta