# setup

In [1]:
!nvidia-smi

Mon Nov 25 09:44:11 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.42.06              Driver Version: 555.42.06      CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla V100-SXM2-32GB           On  |   00000000:B2:00.0 Off |                    0 |
| N/A   30C    P0             40W /  300W |       1MiB /  32768MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
!pip install langchain langchain-huggingface bitsandbytes datasets matplotlib scikit-learn

Defaulting to user installation because normal site-packages is not writeable


In [25]:
import getpass
import os
import random
import re
import matplotlib.pyplot as plt
import numpy as np

from time import time
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from datasets import load_dataset
from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
from langchain_core.messages import (
    HumanMessage,
    SystemMessage,
)

In [4]:
api_env_key = "HUGGINGFACEHUB_API_TOKEN"
if os.environ.get(api_env_key) is None:
    os.environ[api_env_key] = getpass.getpass(
        "Enter your Hugging Face API key: "
    )

Enter your Hugging Face API key:  ········


In [5]:
llm = HuggingFacePipeline.from_model_id(
    model_id="meta-llama/Llama-3.1-8B-Instruct",
    task="text-generation",
    device=0, 
    pipeline_kwargs=dict(
        max_new_tokens=512,
        do_sample=True,
        repetition_penalty=1.1,
        pad_token_id = 128009 # extracted from chat_model.llm.pipeline.tokenizer.eos_token_id
    ),
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [6]:
chat_model = ChatHuggingFace(llm=llm)
chat_model.llm.pipeline.tokenizer.pad_token_id = chat_model.llm.pipeline.tokenizer.eos_token_id

# class Sentiment(BaseModel):
#     sentiment: str = Field(..., description=f"The classification of the sentence, which should be one of: {', '.join(label_map.values())}")
#     reason: str = Field(..., description="Explanation of how the classification was decided")
# struct_chat_model = chat_model.with_structured_output(schema=Sentiment)

# https://stackoverflow.com/questions/78883642/llama3-8b-instruct-returns-value-of-none-when-using-with-structure-output-wher
# Seems to be a bug with structured outputs and the LLAMA 8b model.

In [7]:
print(chat_model.llm.pipeline.tokenizer.eos_token_id)
print(type(chat_model.llm.pipeline.tokenizer.eos_token_id))

128009
<class 'int'>


In [8]:
dataset = load_dataset("sst2")

train_set = dataset["train"]
test_set = dataset["test"]

print(train_set[:5])
print(test_set[:5])

{'text': ['i didnt feel humiliated', 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake', 'im grabbing a minute to post i feel greedy wrong', 'i am ever feeling nostalgic about the fireplace i will know that it is still on the property', 'i am feeling grouchy'], 'label': [0, 0, 3, 2, 3]}
{'text': ['im feeling rather rotten so im not very ambitious right now', 'im updating my blog because i feel shitty', 'i never make her separate from me because i don t ever want her to feel like i m ashamed with her', 'i left with my bouquet of red and yellow tulips under my arm feeling slightly more optimistic than when i arrived', 'i was feeling a little vain when i did this one'], 'label': [0, 0, 0, 1, 0]}


In [1]:
label_map = {
    0: "negative",
    1: "positive"
}

def map_labels(data):
  data["label_human"] = label_map[data["label"]]
  return data

train_set = train_set.map(map_labels)
test_set = test_set.map(map_labels)

print(train_set[:5])
print(test_set[:5])

NameError: name 'train_set' is not defined

In [32]:
def get_reasoning(label):
    reasoning_map = {
        0: "This sentence conveys dissatisfaction, criticism, or unfavorable emotions, often using words that imply negativity, disappointment, or disapproval.",
        1: "This sentence expresses favorable emotions, satisfaction, or praise, often using language that conveys positivity, happiness, or approval."
    }
    return reasoning_map.get(label)

examples_by_label = {}
for example in train_set:
    label = example["label"]
    if label not in examples_by_label:
        examples_by_label[label] = []
    examples_by_label[label].append(example)

random.seed(42)
few_shot_examples = []

for label, examples in examples_by_label.items():
    few_shot_examples.extend(random.sample(examples, 1))

print(few_shot_examples)

[{'text': 'im feel alone and i dont know how to cope', 'label': 0, 'emotion': 'sadness', 'label_human': 'sadness'}, {'text': 'i feel really angry sometimes because for the love of god havent we been through enough', 'label': 3, 'emotion': 'anger', 'label_human': 'anger'}, {'text': 'ive been feeling a bit nostalgic ever since i went through a box of my dads old pictures for a post i did for a href http mysalvagedtreasures', 'label': 2, 'emotion': 'love', 'label_human': 'love'}, {'text': 'i had a hard time focusing on my life and walked around feeling dazed and confused', 'label': 5, 'emotion': 'surprise', 'label_human': 'surprise'}, {'text': 'i have just had such a crappy week that i am still feeling all agitated and like the day wasn t what i wanted', 'label': 4, 'emotion': 'fear', 'label_human': 'fear'}, {'text': 'id probably go with none on and hope that my date admires a confident girl who feels fine without makeup', 'label': 1, 'emotion': 'joy', 'label_human': 'joy'}]


In [33]:
examples_text = "\n".join(
    f"Sentence: {example['text']}\n"
    f"Sentiment: {example['label_human']}"
    for example in few_shot_examples
)

print(examples_text)

Sentence: im feel alone and i dont know how to cope
Sentiment: sadness
Sentence: i feel really angry sometimes because for the love of god havent we been through enough
Sentiment: anger
Sentence: ive been feeling a bit nostalgic ever since i went through a box of my dads old pictures for a post i did for a href http mysalvagedtreasures
Sentiment: love
Sentence: i had a hard time focusing on my life and walked around feeling dazed and confused
Sentiment: surprise
Sentence: i have just had such a crappy week that i am still feeling all agitated and like the day wasn t what i wanted
Sentiment: fear
Sentence: id probably go with none on and hope that my date admires a confident girl who feels fine without makeup
Sentiment: joy


In [34]:
examples_text_cot = "\n".join(
    f"Sentence: {example['text']}\n"
    f"Reasoning: {get_reasoning(example['label'])}\n"
    f"Sentiment: {example['label_human']}"
    for example in few_shot_examples
)

print(examples_text_cot)

Sentence: im feel alone and i dont know how to cope
Reasoning: This sentence conveys a sense of loss, disappointment, or unhappiness, often associated with words or phrases indicating regret or sorrow.
Sentiment: sadness
Sentence: i feel really angry sometimes because for the love of god havent we been through enough
Reasoning: This sentence contains language that conveys frustration, irritation, or hostility, often marked by aggressive or critical tones.
Sentiment: anger
Sentence: ive been feeling a bit nostalgic ever since i went through a box of my dads old pictures for a post i did for a href http mysalvagedtreasures
Reasoning: This sentence expresses affection, admiration, or strong emotional connections, often reflecting bonds of love or devotion.
Sentiment: love
Sentence: i had a hard time focusing on my life and walked around feeling dazed and confused
Reasoning: This sentence suggests surprise or astonishment, often with words or phrases that indicate unexpected outcomes or di

# Functions

In [35]:
def zero_shot(content: str, examples = None):
    # Asking for reasoning increases prompt response time by 10x.
    # f"Now classify the following sentence and provide reasoning for your classification. The output MUST follow this format:\n"
    # f"Sentiment: [Classification]\nReason: [Explanation]"

    messages = [
        SystemMessage(
            content=f"Your goal is to read a sentence and classify its sentiment into one of the following categories: {', '.join(label_map.values())}.\n\n"
                    f"Now classify the following sentence. The output MUST follow this format:\n"
                    f"Sentiment: [Classification]"
        ),
        HumanMessage(
            content=content
        ),
    ]

    response = chat_model.invoke(messages)
    return response.content

def few_shot(content: str, examples: str):
    messages = [
        SystemMessage(
            content=f"Your goal is to read a sentence and classify its sentiment into one of the following categories: {', '.join(label_map.values())}.\n\n"
                    f"Here are some examples:\n{examples}\n\n"
                    f"Now classify the following sentence. The output MUST follow this format:\n"
                    f"Sentiment: [Classification]"
        ),
        HumanMessage(
            content=content
        ),
    ]

    response = chat_model.invoke(messages)
    return response.content

# def parse_sentiment_reason(response):
#     sentiment_pattern = r"Sentiment:\s*(.*?)\n"
#     reason_pattern = r"Reason:\s*(.*?)$"
    
#     sentiment_matches = re.findall(sentiment_pattern, response)
#     reason_match = re.search(reason_pattern, response)

#     sentiment = sentiment_matches[1].strip().lower() if len(sentiment_matches) > 1 else None
#     reason = reason_match.group(1).strip() if reason_match else None
    
#     return (sentiment, reason)

def parse_sentiment(response):
    sentiment_pattern = r"Sentiment:\s*(.*?)$"
    sentiment_match = re.search(sentiment_pattern, response)

    sentiment = sentiment_match.group(1).strip().lower() if sentiment_match else "invalid"

    # Sometimes, model outputs in an incorrect format (e.g. "determination/resilience (however, closest match from your options would be: joy)")
    # This will make it simply identify as None rather than trying to deal with parsing the output.
    if sentiment not in [label for label in label_map.values()]:
        # print(f"+++\nResponse in invalid format:\n\n{response}\n\n---\n")
        sentiment = "invalid"

    return sentiment

def evaluate_model(dataset, fn, examples):
    t0 = time()
    predictions = []

    for idx, (sentence, true_label) in enumerate(zip(dataset["text"], dataset["label_human"]), 1):
        prediction = fn(sentence, examples)
        sentiment = parse_sentiment(prediction)

        predictions.append(sentiment)

        if idx % 100 == 0:
            print(f"---\nSentence: {sentence}\nTrue: {true_label}\nPrediction: {sentiment}")
            print(f"Processed {idx}/{len(dataset)} examples, Time: {time()-t0:.3f}\n---\n")

    return predictions

def create_reports(dataset, predictions):
    calc_accuracy(dataset, predictions)
    class_report(dataset, predictions)
    conf_matrix(dataset, predictions)
    acc_graph(dataset, predictions)

def calc_accuracy(dataset, predictions):
    true_labels = dataset["label_human"]
    accuracy = sum([1 if p == t else 0 for p, t in zip(predictions, true_labels)]) / len(true_labels) * 100
    print(f"accuracy: {accuracy:.4f}")

def class_report(dataset, predictions):
    true_labels = dataset["label_human"]
    print(classification_report(
            true_labels, 
            predictions, 
            target_names=list(label_map.values()) + ["invalid"],
            zero_division=0))

def conf_matrix(dataset, predictions):
    true_labels = dataset["label_human"]
    cm = confusion_matrix(true_labels, predictions)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(label_map.values()) + ["invalid"])
    disp.plot()
    plt.show()

def acc_graph(dataset, predictions):
    true_labels = dataset["label_human"]
    accuracies = np.cumsum(np.array(true_labels) == np.array(predictions)) / np.arange(1, len(true_labels) + 1)
    plt.figure(figsize=(8, 5))
    plt.plot(accuracies * 100)
    plt.title("Accuracy Over Samples")
    plt.xlabel("Number of Samples")
    plt.ylabel("Accuracy (%)")
    plt.grid()
    plt.show()

# Zero-Shot Prompting Preview

In [36]:
ai_msg = zero_shot("The food was absolutely amazing and delightful!")
print(ai_msg)

ai_msg = zero_shot("The experience was the worst I've ever had.")
print(ai_msg)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

Your goal is to read a sentence and classify its sentiment into one of the following categories: sadness, joy, love, anger, fear, surprise.

Now classify the following sentence. The output MUST follow this format:
Sentiment: [Classification]<|eot_id|><|start_header_id|>user<|end_header_id|>

The food was absolutely amazing and delightful!<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Sentiment: joy
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

Your goal is to read a sentence and classify its sentiment into one of the following categories: sadness, joy, love, anger, fear, surprise.

Now classify the following sentence. The output MUST follow this format:
Sentiment: [Classification]<|eot_id|><|start_header_id|>user<|end_header_id|>

The experience was the worst I've ever had.<|eo

# Few-Shot Prompting Preview

In [37]:
ai_msg = few_shot("The food was absolutely amazing and delightful!", examples_text)
print(ai_msg)

ai_msg = few_shot("The experience was the worst I've ever had.", examples_text)
print(ai_msg)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

Your goal is to read a sentence and classify its sentiment into one of the following categories: sadness, joy, love, anger, fear, surprise.

Here are some examples:
Sentence: im feel alone and i dont know how to cope
Sentiment: sadness
Sentence: i feel really angry sometimes because for the love of god havent we been through enough
Sentiment: anger
Sentence: ive been feeling a bit nostalgic ever since i went through a box of my dads old pictures for a post i did for a href http mysalvagedtreasures
Sentiment: love
Sentence: i had a hard time focusing on my life and walked around feeling dazed and confused
Sentiment: surprise
Sentence: i have just had such a crappy week that i am still feeling all agitated and like the day wasn t what i wanted
Sentiment: fear
Sentence: id probably go with none on and hope that my date admires a confident girl who feels fine without 

# Few-Shot with Chain-of-Thought Preview

In [38]:
ai_msg = few_shot("The food was absolutely amazing and delightful!", examples_text_cot)
print(ai_msg)

ai_msg = few_shot("The experience was the worst I've ever had.", examples_text_cot)
print(ai_msg)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

Your goal is to read a sentence and classify its sentiment into one of the following categories: sadness, joy, love, anger, fear, surprise.

Here are some examples:
Sentence: im feel alone and i dont know how to cope
Reasoning: This sentence conveys a sense of loss, disappointment, or unhappiness, often associated with words or phrases indicating regret or sorrow.
Sentiment: sadness
Sentence: i feel really angry sometimes because for the love of god havent we been through enough
Reasoning: This sentence contains language that conveys frustration, irritation, or hostility, often marked by aggressive or critical tones.
Sentiment: anger
Sentence: ive been feeling a bit nostalgic ever since i went through a box of my dads old pictures for a post i did for a href http mysalvagedtreasures
Reasoning: This sentence expresses affection, admiration, or strong emotional conn

# Zero-Shot Evaluation

In [None]:
# sampleset = test_set.select(range(100))

In [30]:
zero_shot_results = evaluate_model(test_set, zero_shot, None)

---
Sentence: i was still feeling weepy and strung out so maggie treated me to ice cream and a movie a href http www
True: sadness
Prediction: joy
Processed 100/2000 examples, Time: 38.075
---

---
Sentence: i feel that third situation pretty much sums up my feelings toward this title
True: joy
Prediction: sadness
Processed 200/2000 examples, Time: 77.549
---

---
Sentence: im feeling slightly irritable today
True: anger
Prediction: anger
Processed 300/2000 examples, Time: 114.315
---

---
Sentence: ive been feeling afraid a lot lately
True: fear
Prediction: fear
Processed 400/2000 examples, Time: 151.979
---

---
Sentence: i will admit and it left me feeling shaken and a bit of a goose
True: fear
Prediction: fear
Processed 500/2000 examples, Time: 192.379
---



KeyboardInterrupt: 

In [None]:
create_reports(test_set, zero_shot_results)

# Few-Shot Evaluation

In [None]:
few_shot_results = evaluate_model(test_set, few_shot, examples_text)

In [None]:
create_reports(test_set, few_shot_results)

# Few-Shot Chain-of-Thought Evaluation

In [None]:
few_shot_cot_results = evaluate_model(test_set, few_shot, examples_text_cot)

In [None]:
create_reports(test_set, few_shot_cot_results)