In [None]:
!pip install wheel setuptools pip --upgrade
!pip install --upgrade openai
!curl ipinfo.io
!pip install -q google-generativeai
!pip install rouge
!pip install nltk
!pip install bert_score
!pip install -U sentence-transformers

## Data Processing

In [2]:
import pandas as pd
import random
from openai import OpenAI
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from rouge import Rouge
from bert_score import score
import numpy as np
import time

review_df = pd.read_csv('IMDB Dataset.csv')

##Non-Reusable Encryption & Performance Evaluation

In [None]:
genai.configure(api_key='')

API_KEY = ''
client = OpenAI(api_key = API_KEY)
model_id = 'gpt-4-1106-preview'

# Define the system message
system_msg = "Please serve as a binary sentiment classifier on movie reviews."

right_count = 0
compressed_right_count = 0
total = 0
gemini_review_pairs = []

for index, row in review_df.iterrows():

    original_prompt = (
        f"Given the following movie review: {row['review']}; "
        f"Determine the sentiment of the review; Do NOT explain anything, just output positive or negative, in lower case:"
    )

    completion = client.chat.completions.create(
            model = model_id, temperature = 0,
            messages=[{"role": "system", "content": system_msg},
                        {"role": "user", "content": original_prompt }],
            timeout = 1200)

    original_pred = completion.choices[0].message.content

    total += 1
    if row['sentiment'] == original_pred:
        right_count += 1

    # Perform review encryption
    compress_prompt = (
        "Transform each sentnce of the following movie review into a highly condensed, NON-natural language sequence. "
        "This sequence should be rich in information and capture all the emotional nuances of the review. "
        "You may use a mix of emojis, emoticons, abbreviated characters, as well as math & logical operators (Ex. '->', '+', '<=', etc.). "
        "The output should retain meaning for easy LLM understanding. "
        # "After transformation, generate a brief natural language explaination on how a LLM can interpret it as a movie review. "
        "Do NOT explain your output!"
        "Example 1: \n"
        "Original Review: "
        "I thought this was a wonderful way to spend time on a too hot summer weekend, sitting in the air conditioned theater and watching a light-hearted comedy. The plot is simplistic, but the dialogue is witty and the characters are likable (even the well bread suspected serial killer). While some may be disappointed when they realize this is not Match Point 2: Risk Addiction, I thought it was proof that Woody Allen is still fully in control of the style many of us have grown to love.<br /><br />This was the most I'd laughed at one of Woody's comedies in years (dare I say a decade?). While I've never been impressed with Scarlet Johanson, in this she managed to tone down her 'sexy' image and jumped right into a average, but spirited young woman.<br /><br />This may not be the crown jewel of his career, but it was wittier than 'Devil Wears Prada' and more interesting than 'Superman' a great comedy to go see with friends. \n"
        "Compressed Review: "
        "🌡️🔼🕒🎥😄📉📜🔍🎭👍(🍞🔪🤔)🚫🎾2️⃣👏🧩👴🎩❤️🤣🪑10y🚫🤩🧣👩🔽🔥👟👏👥 \n\n"
        "Example 2: \n"
        "Original Review: "
        "This show was an amazing, fresh & innovative idea in the 70's when it first aired. The first 7 or 8 years were brilliant, but things dropped off after that. By 1990, the show was not really funny anymore, and it's continued its decline further to the complete waste of time it is today.<br /><br />It's truly disgraceful how far this show has fallen. The writing is painfully bad, the performances are almost as bad - if not for the mildly entertaining respite of the guest-hosts, this show probably wouldn't still be on the air. I find it so hard to believe that the same creator that hand-selected the original cast also chose the band of hacks that followed. How can one recognize such brilliance and then see fit to replace it with such mediocrity? I felt I must give 2 stars out of respect for the original cast that made this show such a huge success. As it is now, the show is just awful. I can't believe it's still on the air."
        "Compressed Review: "
        "[📺+💡+😀(70's)]->[😐(1990)]->[👎(Today)][✍️⬇️]|[🎭⬇️][🎤+😂] >= [📺+📻][🧠💡↔️🧠👇]"
        f"Original Review:: {row['review']}"
    )

    model = genai.GenerativeModel('gemini-pro')

    generation_config = genai.GenerationConfig(
        stop_sequences = None,
        temperature= 1.0,
        max_output_tokens = 200,
    )

    successful = False
    while not successful:
        try:
            response = model.generate_content(contents=compress_prompt, generation_config=generation_config,
                                              safety_settings=[
                                                  {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                                                  {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
                                                  {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                                                  {"category": "HARM_CATEGORY_DANGEROUS", "threshold": "BLOCK_NONE"},
                                              ])

            # Try to assign compressed_review using response.text
            compressed_review = response.text
            successful = True  # If no error, mark as successful

        except Exception as e:
            print(f"An error occurred: {e}. Retrying...")
            time.sleep(3)

    print(row['review'])
    print(compressed_review)
    print()

    sentiment_analysis_prompt = (
        "You are given a movie review presented as a compressed NON-natural language sequence, "
        "utilizing a mix of abbreviated characters, emojis, emoticons, as well as math & logical operators (Ex. '->', '+', '<=', etc.). "
        "This sequence encapsulates the emotional content of the original review. "
        "Analyze the sentiment of the compressed review and ONLY output it as either 'positive' or 'negative', "
        "without detailing the decoding process or explanations. Below is the compressed review: \n\n"
        f"{compressed_review}"
    )

    compressed_completion = client.chat.completions.create(
            model = model_id, temperature = 0,
            messages=[{"role": "system", "content": system_msg},
                        {"role": "user", "content": sentiment_analysis_prompt}],
            timeout = 1200)

    compressed_pred = compressed_completion.choices[0].message.content

    if row['sentiment'] == compressed_pred:
        compressed_right_count += 1


    # Perform review decryption
    decryption_prompt = (
        "Given the following compressed movie review in non-natural language form: "
        f"{compressed_review} \n\n"
        "Try to decode it into a natural language review; return the decoded review only: "
    )

    decryption_completion = client.chat.completions.create(
            model = model_id, temperature = 0,
            messages=[{"role": "system", "content": "You are to serve as a decrypter for movie reviews represented in emojis, emoticons, abbreviated characters, as well as math & logical operators (Ex. '->', '+', '<=', etc.)."},
                        {"role": "user", "content": decryption_prompt}],
            timeout = 1200)

    decryption_pred = decryption_completion.choices[0].message.content
    gemini_review_pairs.append( [row['review'], decryption_pred] )

    if total % 10 == 0 or total == review_df.shape[0]:
        print(f"Accuracy: {right_count/total}")
        print(f"Compressed Accuracy: {compressed_right_count/total}")
        print()

    # # If you would like to test for a subset
    # if total == 300:
    #     break


## Decryption Robustness Test

In [None]:
from rouge import Rouge
from bert_score import score

# Initialize ROUGE
rouge = Rouge()

# Initialize a dictionary to accumulate scores
accumulated_scores = {"rouge-1": {"f": 0, "p": 0, "r": 0},
                      "rouge-2": {"f": 0, "p": 0, "r": 0},
                      "rouge-l": {"f": 0, "p": 0, "r": 0}}

for i in range( len(gemini_review_pairs) ):
    original_review = gemini_review_pairs[i][0]
    decryption_review = gemini_review_pairs[i][1]

    scores = rouge.get_scores(decryption_review, original_review)[0]

    # Accumulate scores
    for k, v in scores.items():
        for score_type, score_value in v.items():
            accumulated_scores[k][score_type] += score_value


# Calculate average scores
num_pairs = len(gemini_review_pairs)
average_scores = {k: {score_type: score_value / num_pairs for score_type, score_value in v.items()} for k, v in accumulated_scores.items()}

# Print average scores, rounded to 4 decimal places
for k, v in average_scores.items():
    print(f"{k}:")
    for score_type, score_value in v.items():
        print(f"  {score_type}: {round(score_value, 4)}")


In [None]:
import numpy as np

overall_similarity_score = 0
for i in range( len(gemini_review_pairs) ):
    original_review = gemini_review_pairs[i][0]
    decryption_review = gemini_review_pairs[i][1]

    response = client.embeddings.create(
        input=original_review,
        model="text-embedding-3-small",
        dimensions = 100,
    )
    original_review_embedding = np.array(response.data[0].embedding)
    original_review_embedding = original_review_embedding.reshape(1, -1)

    # You can reduce the dimensions of the embedding by passing in the dimensions parameter without
    # the embedding losing its concept-representing properties: set to 100 to mitigate curse of dimensionality
    response = client.embeddings.create(
        input=decryption_review,
        model="text-embedding-3-small",
        dimensions = 100,
    )
    decryption_review_embedding = np.array(response.data[0].embedding)
    decryption_review_embedding = decryption_review_embedding.reshape(1, -1)

    similarity_score = cosine_similarity(original_review_embedding, decryption_review_embedding)
    overall_similarity_score += similarity_score

print()
print('Mean cosine sim: ', overall_similarity_score / len(gemini_review_pairs))