Task 3:

In [None]:
import os
import json
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer

In [None]:
from datasets import load_dataset, Dataset
import pandas as pd

# Load the dataset from Hugging Face
sentiment_dataset = load_dataset('carblacac/twitter-sentiment-analysis')

# Access the training and testing sets directly
train_dataset = sentiment_dataset['train']
test_dataset = sentiment_dataset['test']

# Convert to Pandas DataFrame
train_df = train_dataset.to_pandas()
test_df = test_dataset.to_pandas()

# Select the first 1000 rows of the train dataset
train_df = train_df[:1000] #30000

# Select the first 50 rows of the test dataset
# test_df = test_dataset.to_pandas()[:50]
test_df = test_df[:50]


In [3]:
# Define two detailed sets of instructions
instructions1 = """
Please read the tweet provided below. Your task is to analyze the content and context of the tweet to determine whether it 
expresses a positive or negative sentiment. Consider the use of emotive language, punctuation, and any emoticons used. 
Classify the tweet accordingly as either '1' if it conveys a favorable opinion or emotion, or '0' if it 
expresses an unfavorable opinion or emotion.
### Tweet: {tweet}
### Sentiment
"""

# Function to append instructions to each row
def add_instructions(df):
    df['Instruction1'] = df.apply(lambda x: instructions1.format(tweet=x['text'], sentiment=x['feeling']), axis=1)
    return df

# Apply instructions to the full test datasets
test_df = add_instructions(test_df)


In [4]:
print(test_df.iloc[0]['Instruction1'])


Please read the tweet provided below. Your task is to analyze the content and context of the tweet to determine whether it 
expresses a positive or negative sentiment. Consider the use of emotive language, punctuation, and any emoticons used. 
Classify the tweet accordingly as either '1' if it conveys a favorable opinion or emotion, or '0' if it 
expresses an unfavorable opinion or emotion.
### Tweet: @justineville ...yeahhh. ) i'm 39 tweets from 1,600!
### Sentiment



In [5]:
test_dset = Dataset.from_pandas(test_df)

In [6]:
from tqdm.auto import tqdm

# Function to set up and run the pipeline
def run_model(model, tokenizer, test_dset, file_name):

    # Create the pipeline
    pipe = pipeline(
        task="text-generation",
        model=model,
        tokenizer=tokenizer,
        device=0
    )

    # Generate responses
    batch_size = 20
    num_examples = len(test_dset)
    total_batches = (num_examples + batch_size - 1) // batch_size
    generated_output = []

    for i in tqdm(range(0, num_examples, batch_size), total=total_batches, desc="Generating text"):
        batch_indices = range(i, min(i + batch_size, num_examples))
        batch = test_dset.select(batch_indices)
        prompts = [example['Instruction1'].split('### Sentiment:')[0].strip() for example in batch]
        results = pipe(prompts, max_new_tokens=64)
        
        # counter=0
        for result in results:
            generated_text = result[0]['generated_text']
            # generated_output.append(generated_text)
            prompt = prompts[results.index(result)]
            reference_answer = batch[results.index(result)]['feeling']
            
            # counter += 1
            # print(counter)
            # print(f"Prompt: {prompt}\n")
            # print(f"Reference Answer: {reference_answer}\n")
            # print(f"Generated Text: {generated_text}")
            # print("------")

            # Save results in structured format
            generated_output.append({
                "Prompt": prompt,
                "Reference_answer": reference_answer,
                "Generated_outputs": generated_text
            })

    # Save to JSON
    with open(f'/work/gns938/nlp_hw3/{file_name}.json', 'w') as f:
        json.dump(generated_output, f, indent=4)

    print(f"Results saved to '{file_name}.json'.")
    

In [7]:
model_paths = [
    '/work/gns938/nlp_hw3/Mistral-sentiment-fine-tuned',
    '/work/gns938/nlp_hw3/Mistral-mixed-fine-tuned',
    'mistralai/Mistral-7B-v0.1'
]
file_names = ['generated_output_sentiment_fine_tuned', 'generated_output_mixed_fine_tuned', 'generated_output_original_pretrained']

for model_path, file_name in zip(model_paths, file_names):

    # Load the model and tokenizer
    model = AutoModelForCausalLM.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    run_model(model, tokenizer, test_dset, file_name)

Loading checkpoint shards: 100%|██████████| 2/2 [00:14<00:00,  7.36s/it]
Generating text: 100%|██████████| 3/3 [02:02<00:00, 40.75s/it]


Results saved to 'generated_output_sentiment_fine_tuned.json'.


Loading checkpoint shards: 100%|██████████| 2/2 [00:15<00:00,  7.95s/it]
Generating text: 100%|██████████| 3/3 [02:02<00:00, 40.78s/it]


Results saved to 'generated_output_mixed_fine_tuned.json'.


Loading checkpoint shards: 100%|██████████| 2/2 [00:15<00:00,  7.87s/it]
Generating text:   0%|          | 0/3 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end g

Results saved to 'generated_output_original_pretrained.json'.


In [8]:
import json
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import os

# Function to extract sentiment from generated outputs
def extract_sentiment(generated_text):
    try:
        # Split the text and extract the part after "### Sentiment: "
        sentiment_part = generated_text.split("\n### Sentiment: ")[1]
        # Now extract the sentiment value before any subsequent newline
        sentiment = sentiment_part.split("\n")[0].strip()
        return int(sentiment)  # Ensure it's converted to integer
    except (IndexError, ValueError):
        # In case of parsing or conversion error, return None
        return None

def evaluate_results(file_path):
    # Load the results from JSON file
    with open(file_path, 'r') as f:
        results = json.load(f)

    # Extract reference and predicted sentiments
    reference_answers = [int(result["Reference_answer"]) for result in results]
    predicted_sentiments = [extract_sentiment(result["Generated_outputs"]) for result in results]

    # Remove None values and ensure all items are integers
    filtered_reference_answers = []
    filtered_predicted_sentiments = []
    for ref, pred in zip(reference_answers, predicted_sentiments):
        if pred is not None:
            filtered_reference_answers.append(ref)
            filtered_predicted_sentiments.append(pred)

    # Calculate accuracy, F1 score, precision, and recall
    accuracy = accuracy_score(filtered_reference_answers, filtered_predicted_sentiments)
    f1 = f1_score(filtered_reference_answers, filtered_predicted_sentiments, average='binary')
    precision = precision_score(filtered_reference_answers, filtered_predicted_sentiments, average='binary')
    recall = recall_score(filtered_reference_answers, filtered_predicted_sentiments, average='binary')

    # Print results
    print(f"Results for {os.path.basename(file_path)}:")
    print(f"Accuracy: {accuracy}")
    print(f"F1 Score: {f1}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}\n")

# List of JSON files to evaluate
json_files = [
    '/work/gns938/nlp_hw3/generated_output_sentiment_fine_tuned.json',
    '/work/gns938/nlp_hw3/generated_output_mixed_fine_tuned.json',
    '/work/gns938/nlp_hw3/generated_output_original_pretrained.json'
]

# Evaluate each file
for file_path in json_files:
    evaluate_results(file_path)

Results for generated_output_sentiment_fine_tuned.json:
Accuracy: 0.92
F1 Score: 0.9130434782608695
Precision: 0.9130434782608695
Recall: 0.9130434782608695

Results for generated_output_mixed_fine_tuned.json:
Accuracy: 0.88
F1 Score: 0.8636363636363636
Precision: 0.9047619047619048
Recall: 0.8260869565217391

Results for generated_output_original_pretrained.json:
Accuracy: 0.62
F1 Score: 0.6885245901639344
Precision: 0.5526315789473685
Recall: 0.9130434782608695



In [2]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from datasets import Dataset
import torch
from tqdm import tqdm
import json

model_sentiment_path = '/work/gns938/nlp_hw3/Mistral-sentiment-fine-tuned'
model_mixed_path = '/work/gns938/nlp_hw3/Mistral-mixed-fine-tuned'

instructions = [
    "### Instruction:\nSummarize the text provided in one sentence.\n\n### Input:\nArtificial intelligence refers to the simulation of human intelligence in machines that are programmed to think like humans and mimic their actions.\n\n### Answer: ",
    "### Instruction:\nTranslate the following sentence from English to French.\n\n### Input: What time is the sunset tonight?\n\n### Answer: ",
    "### Instruction:\nGenerate a question based on the text.\n\n### Input: Global warming is causing Arctic ice to melt at unprecedented rates.\n\n### Answer: ",
    "### Instruction:\nExplain the implications of this historical event.\n\n### Input: In 1990, Germany was reunified, marking the end of the Cold War in Europe.\n\n### Answer: ",
    "### Instruction:\nWrite a poem about the theme described.\n\n### Input: The relentless pursuit of technological advancement.\n\n### Answer: ",
    "### Instruction:\nDescribe the steps involved in photosynthesis.\n\n### Input: Photosynthesis is the process by which green plants and some other organisms use sunlight to synthesize foods from carbon dioxide and water.\n### Answer: ",
    "### Instruction:\nConvert this dialogue into a formal report.\n\n### Input: Customer: 'I'd like to return this item.' Sales Assistant: 'Certainly, do you have the receipt?'\n\n### Answer: ",
    "### Instruction:\nCreate a list of recommendations based on the user's preferences.\n\n### Input: The user enjoys historical novels, prefers complex characters, and dislikes predictable plots.\n\n### Answer: ",
    "### Instruction:\nDeduce the moral of the story provided.\n\n### Input: A fox, after failing to reach a bunch of grapes, declared they were sour anyway.\n\n### Answer: ",
    "### Instruction:\nIdentify and explain the literary devices used in this text.\n\n### Input: The wind whispered through the dark, foreboding woods.\n\n### Answer: "
]

test_dataset = Dataset.from_dict({'text': instructions})

def generate_response(model_path, test_dataset):
    model = AutoModelForCausalLM.from_pretrained(model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, device=0)
    
    batch_size = 1
    num_examples = len(test_dataset)
    total_batches = (num_examples + batch_size - 1) // batch_size
    generated_output = []

    for i in tqdm(range(0, num_examples, batch_size), total=total_batches, desc="Generating text"):
        batch_indices = range(i, min(i + batch_size, num_examples))
        batch = test_dataset.select(batch_indices)
        prompts = [example['text'] for example in batch]

        results = pipe(prompts, max_new_tokens=128)
        
        for result in results:
            generated_text = result[0]['generated_text']
            generated_output.append(generated_text)

    return generated_output

responses_sentiment = generate_response(model_sentiment_path, test_dataset)
responses_mixed = generate_response(model_mixed_path, test_dataset)

results_json = []
for i, instruction in enumerate(instructions):
    results_json.append({
        "Instruction": instruction,
        "Sentiment_Fine_Tuned": responses_sentiment[i],
        "Mixed_Fine_Tuned": responses_mixed[i]
    })

with open('/work/gns938/nlp_hw3/generated_output_instructions.json', 'w+') as f:
    json.dump(results_json, f, indent=4)

print("Results saved to 'generated_output_instructions.json")

Loading checkpoint shards: 100%|██████████| 2/2 [00:16<00:00,  8.21s/it]
Generating text: 100%|██████████| 10/10 [00:47<00:00,  4.74s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:16<00:00,  8.04s/it]
Generating text: 100%|██████████| 10/10 [00:42<00:00,  4.22s/it]

Results saved to 'generated_output_instructions.json



