In [None]:
import pandas as pd
from sklearn.metrics import f1_score
import numpy as np

In [None]:
pd.set_option("max_colwidth", 100)
pd.set_option("display.max_rows",10)
pd.set_option("display.max_columns",5)
DS= pd.read_pickle('balanced_IMDB.pkl');
sentiment_data = DS
sentiment_data


In [None]:
!pip install --upgrade openai

In [None]:
from openai import OpenAI
client = OpenAI(
    api_key='your api key',
)

In [None]:
def get_completion(messages, model="gpt-4o"):
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message.content

In [None]:
def get_completion(messages, model="gpt-3.5-turbo-0125"):
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message.content

In [None]:

import time

def connect_with_api(messages):
    max_retries = 3
    retries = 0

    while retries < max_retries:
        try:
            response = get_completion(messages)
            return response
        except Exception as e:
            print(f"An error occurred: {e}")
            retries += 1
            print(f"Retrying... (Attempt {retries} of {max_retries})")
            time.sleep(1)  # Wait for 1 second before retrying

    print("Failed to connect with the API after multiple attempts.")

In [None]:
test_samples_per_sentiment = 996
train_samples_per_sentiment = 4
test_df = pd.DataFrame()
train_df = pd.DataFrame()
for sentiment in sentiment_data['sentiment'].unique():
    sentiment_df = sentiment_data[sentiment_data['sentiment'] == sentiment]
    test_samples = sentiment_df.sample(test_samples_per_sentiment, random_state=1)
    train_samples = sentiment_df.drop(test_samples.index).sample(train_samples_per_sentiment, random_state=1)

    test_df = pd.concat([test_df, test_samples])
    train_df = pd.concat([train_df, train_samples])

# Reset the index of the resulting DataFrames
test_df = test_df.reset_index(drop=True)
train_df = train_df.reset_index(drop=True)

In [None]:
def clean_response(response):
    response = response.strip().lower()
    if 'positive' in response:
        return 'positive'
    elif 'negative' in response:
        return 'negative'
    else:
        return None

In [None]:

# Experiment configurations
def run_experiment(prompt_design, test_df, train_df, experiment_name):
    test_results = []
    for index, row in test_df.iterrows():
        if prompt_design == "zeroU":
            messages = [{"role": "user", "content": f"""Determine the sentiment of the movie based on the provided review:
             For the review provided, classify its sentiment as a single word (without other marks or words like 'sentiment:'), either "positive", or "negative".
```{row['review']}```""" }]
        elif prompt_design == "zeroSU":
            messages = [{"role": "system", "content": f"""Determine the sentiment of the movie based on the provided review.
             For the review provided, classify its sentiment as a single word (without other marks or words like 'sentiment:'), either "positive", or "negative"."""},
                        {"role": "user", "content": f"""```{row['review']}```"""}]
        elif prompt_design == "fewU":
            messages = [{"role": "user", "content": f"""Determine the sentiment of the movie based on the provided review:
             For the review provided, classify its sentiment as a single word (without other marks or words like 'sentiment:'), either "positive", or "negative".
Examples:
```{train_df.iloc[0]['review']}``` - {train_df.iloc[0]['sentiment']}
```{train_df.iloc[2]['review']}``` - {train_df.iloc[2]['sentiment']}
```{train_df.iloc[1]['review']}``` - {train_df.iloc[1]['sentiment']}
...
```{row['review']}```""" }]
        elif prompt_design == "fewSU":
            messages = [{"role": "system", "content": f"""Determine the sentiment of the movie based on the provided review.
             For the review provided, classify its sentiment as a single word (without other marks or words like 'sentiment:'), either either "positive", or "negative"."""},
                        {"role": "user", "content": f"""Examples:
```{train_df.iloc[0]['review']}``` - {train_df.iloc[0]['sentiment']}
```{train_df.iloc[2]['review']}``` - {train_df.iloc[2]['sentiment']}
```{train_df.iloc[1]['review']}``` - {train_df.iloc[1]['sentiment']}
...
```{row['review']}```"""}]
        elif prompt_design == "fewSUA":
            messages = [{"role": "system", "content": f"""Determine the sentiment of the movie based on the provided review.
            For the review provided, classify its sentiment as a single word (without other marks or words like 'sentiment:'), either "positive", or "negative"."""},
                        {"role": "user", "content": train_df.iloc[0]['review']}, {"role": "assistant", "content": train_df.iloc[0]['sentiment']},
                        {"role": "user", "content": train_df.iloc[2]['review']}, {"role": "assistant", "content": train_df.iloc[2]['sentiment']},
                        {"role": "user", "content": train_df.iloc[1]['review']}, {"role": "assistant", "content": train_df.iloc[1]['sentiment']},
                        {"role": "user", "content": f"""```{row['review']}```"""}]

        prediction = connect_with_api(messages)
        print(index, ' ', prediction)

        test_results.append({'Index': index, 'review': row['review'], 'Predicted_sentiment': prediction, 'Actual_sentiment': row['sentiment']})

    results_df = pd.DataFrame(test_results)
    # Calculate structural accuracy before cleaning
    valid_sentiment =['positive', 'negative']
    results_df['Is_Structurally_Valid'] = results_df['Predicted_sentiment'].apply(lambda x: x.strip().lower() in valid_sentiment)
    structural_accuracy = results_df['Is_Structurally_Valid'].mean()

    # Calculate F1 Score
    results_df['Predicted_sentiment'] = results_df['Predicted_sentiment'].apply(clean_response)
    results_df = results_df.dropna(subset=['Predicted_sentiment', 'Actual_sentiment'])
    f1 = f1_score(results_df['Actual_sentiment'], results_df['Predicted_sentiment'], average='micro')

    print(f"Experiment: {experiment_name}")
    print(f"The F1 Score for the predicted sentiment is: {f1}")
    print(f"The Structural Accuracy is: {structural_accuracy}")

    results_df.to_csv(f"/content/drive/MyDrive/Colab Notebooks/In_context_learning/{experiment_name}_results.csv", index=False)
    print(f"Results for {experiment_name} saved to {experiment_name}_results.csv")

In [None]:
# Running the experiments
run_experiment("zeroU", test_df, train_df, "Zero-shot User Prompt")


In [None]:
run_experiment("zeroSU", test_df, train_df, "Zero-shot System and User Prompt")


In [None]:
run_experiment("fewU", test_df, train_df, "Few-shot User Prompt")


In [None]:
run_experiment("fewSU", test_df, train_df, "Few-shot System and User Prompt")


In [None]:
run_experiment("fewSUA", test_df, train_df, "Few-shot System, User, and Assistant Prompt")