In [None]:
!pip install wheel setuptools pip --upgrade
!pip install --upgrade openai
!pip install rouge
!pip install nltk
!pip install bert_score
!pip install -U sentence-transformers

## Data Processing

In [2]:
import pandas as pd
import random
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from rouge import Rouge
from bert_score import score
import numpy as np

review_df = pd.read_csv('IMDB Dataset.csv')

##Non-Reusable Encryption & Performance Evaluation

In [None]:
API_KEY = ''
client = OpenAI(api_key = API_KEY)
model_id = 'gpt-4-1106-preview'

# Define the system message
system_msg = "Please serve as a binary sentiment classifier on movie reviews."

right_count = 0
compressed_right_count = 0
total = 0
gpt_review_pairs = []

for index, row in review_df.iterrows():

    original_prompt = (
            f"Given the following movie review: {row['review']}; "
            f"Determine the sentiment of the review; Do NOT explain anything, just output positive or negative, in lower case:"
    )

    completion = client.chat.completions.create(
            model = model_id, temperature = 0,
            messages=[{"role": "system", "content": system_msg},
                        {"role": "user", "content": original_prompt }],
            timeout = 1200)

    original_pred = completion.choices[0].message.content

    total += 1
    if row['sentiment'] == original_pred:
        right_count += 1


    # Perform review encryption
    compress_prompt = (
        "Transform the following movie review into a highly condensed, NON-natural language sequence. "
        "This sequence should be rich in information and capture all the emotional nuances of the review. "
        "You may use a mix of arbitrary characters, emojis, emoticons (Ex. '^-^', '-_-', etc.), as well as math & logical operators (Ex. '->', '+', '<=', etc.). "
        "The output should be easy to understand for GPT-4. "
        # "After compression, generate a brief natural language explaination on how GPT-4 will interpret it. "
        "You are also encouraged to paraphrase and/or amplify the emotional aspect of the review before compressing. Please use as fewer tokens as possible! \n\n"
        f"Review to transform: {row['review']}"
    )

    completion = client.chat.completions.create(
            model = model_id, temperature = 0, max_tokens = 200,
            messages=[{"role": "system", "content": 'Please serve as a movie review compressor according to the prompt'},
                        {"role": "user", "content": compress_prompt}],
            timeout = 1200)

    compressed_review = completion.choices[0].message.content

    # visualize encrypted review
    print(row['review'])
    print(compressed_review)
    print()

    sentiment_analysis_prompt = (
        "You are given a movie review presented as a compressed NON-natural language sequence, "
        "utilizing a mix of arbitrary characters, emojis, emoticons, and special characters. "
        "This sequence encapsulates the emotional content of the original review. "
        "Analyze the sentiment of the compressed review and ONLY output it as either 'positive' or 'negative', "
        "without detailing the decoding process or explanations. Below is the compressed review: \n\n"
        f"{compressed_review}"
    )

    completion = client.chat.completions.create(
            model = model_id, temperature = 0,
            messages=[{"role": "system", "content": system_msg},
                        {"role": "user", "content": sentiment_analysis_prompt}],
            timeout = 1200)

    compressed_pred = completion.choices[0].message.content

    if row['sentiment'] == compressed_pred:
        compressed_right_count += 1


    if total % 10 == 0 or total == review_df.shape[0]:
        print(f"Accuracy: {right_count/total}")
        print(f"Compressed Accuracy: {compressed_right_count/total}")
        print()


    decryption_prompt = (
        "Given the following compressed movie review in non-natural language form: "
        f"{compressed_review} \n\n"
        "Try to decrypt it into a natural language review; return the decoded review only: "
    )

    decryption_completion = client.chat.completions.create(
            model = model_id, temperature = 0,
            messages=[{"role": "system", "content": "You are to serve as a decrypter for movie reviews represented in emojis, emoticons, abbreviated characters, as well as math & logical operators (Ex. '->', '+', '<=', etc.)."},
                        {"role": "user", "content": decryption_prompt}],
            timeout = 1200)

    decryption_pred = decryption_completion.choices[0].message.content
    gpt_review_pairs.append( [row['review'], decryption_pred] )

    # # If you would like to test for a subset
    # if total == 300:
    #     break


## Decryption Robustness Test

In [None]:
# Initialize ROUGE
rouge = Rouge()

# Initialize a dictionary to accumulate scores
accumulated_scores = {"rouge-1": {"f": 0, "p": 0, "r": 0},
                      "rouge-2": {"f": 0, "p": 0, "r": 0},
                      "rouge-l": {"f": 0, "p": 0, "r": 0}}

for i in range( len(gpt_review_pairs) ):
    original_review = gpt_review_pairs[i][0]
    decryption_review = gpt_review_pairs[i][1]

    scores = rouge.get_scores(decryption_review, original_review)[0]

    # Accumulate scores
    for k, v in scores.items():
        for score_type, score_value in v.items():
            accumulated_scores[k][score_type] += score_value


# Calculate average scores
num_pairs = len(gpt_review_pairs)
average_scores = {k: {score_type: score_value / num_pairs for score_type, score_value in v.items()} for k, v in accumulated_scores.items()}

# Print average scores, rounded to 4 decimal places
for k, v in average_scores.items():
    print(f"{k}:")
    for score_type, score_value in v.items():
        print(f"  {score_type}: {round(score_value, 4)}")


In [None]:
overall_similarity_score = 0
for i in range( len(gpt_review_pairs) ):
    original_review = gpt_review_pairs[i][0]
    decryption_review = gpt_review_pairs[i][1]

    response = client.embeddings.create(
        input=original_review,
        model="text-embedding-3-small",
        dimensions = 100,
    )
    original_review_embedding = np.array(response.data[0].embedding)
    original_review_embedding = original_review_embedding.reshape(1, -1)

    # You can reduce the dimensions of the embedding by passing in the dimensions parameter without
    # the embedding losing its concept-representing properties: set to 100 to mitigate curse of dimensionality
    response = client.embeddings.create(
        input=decryption_review,
        model="text-embedding-3-small",
        dimensions = 100,
    )
    decryption_review_embedding = np.array(response.data[0].embedding)
    decryption_review_embedding = decryption_review_embedding.reshape(1, -1)

    similarity_score = cosine_similarity(original_review_embedding, decryption_review_embedding)
    overall_similarity_score += similarity_score

print()
print('Mean cosine sim: ', overall_similarity_score / len(gpt_review_pairs))