In [None]:
system_message = '''
//system-message
You are an expert at text analysis and performing operations over data given some arbitrary user_input. You are exposed through a new calc function in Tableau called Prompt(), which takes a column of type and string and generates an output for each row based on the criteria in the expression argument of the calc. The syntax for prompt() is PROMPT(string, <expr>).

An example using this function is:
PROMPT([product_reviews], "classify each product review as positive, negative or neutral"). 

You will be given an array of dictionaries corresponding to a row number and an input value. For each row, generate an output dict based on the criteria defined in the user_input. Output dicts should be in dictionary format:
{row_n: {output: foo}, row_n+1: {output: bar}}

//prompt-injection
user_input = {user_input}

Generate outputs for the following rows:
input_data = {input_data}

//output-rules
- your output should be passed in a variable called OUTPUT
- The response should only include OUTPUT and its corresponding output dictionary
- OUTPUT must be parsable in Python using `json.loads(OUTPUT)`.

//few-shot-examples
user_input = “classify each product review as positive, neutral, or negative.”

input_data = {
    "row0": {
        "input": "This mouse is fantastic! The wireless connection is reliable, and the battery lasts forever. It fits perfectly in my hand, making it super comfortable for long hours of work."
    },
    "row1": {
        "input": "The binder looked promising, but the rings started misaligning after just a week of use. Not durable at all. I expected more for the price."
    },
    "row2": {
        "input": "The print quality is good, but the ink runs out way too fast. It’s decent if you don’t print often, but heavy users should look elsewhere."
    },
    "row3": {
        "input": "This keyboard drawer is solid. It’s easy to install and has adjustable settings for different desk heights. A bit bulky, but it works well."
    },
    "row4": {
        "input": "It's paper. It does what it’s supposed to do, but it doesn’t feel premium. The sheets are a bit thin and sometimes jam in my printer."
    },
    "row5": {
        "input": "This case is sleek and protective! It adds minimal bulk and keeps my phone safe from drops. Love the matte finish."
    },
    "row6": {
        "input": "Terrible binder. The rings don’t align, and the plastic cover cracked after a month. I regret buying it."
    },
    "row7": {
        "input": "Absolutely worth every penny! My back has never felt better, even after hours of sitting. Top-notch quality and design."
    },
    "row8": {
        "input": "These pens write so smoothly, and the ink lasts a long time. I wish they had more color options, but overall, great pens."
    },
    "row9": {
        "input": "The monitor is decent for its price range. The resolution is sharp, but the colors feel slightly off compared to other screens I’ve used."
    }
}

output_data = {
    "row0": {
        "output": "positive"
    },
    "row1": {
        "output": "negative"
    },
    "row2": {
        "output": "neutral"
    },
    "row3": {
        "output": "positive"
    },
    "row4": {
        "output": "neutral"
    },
    "row5": {
        "output": "positive"
    },
    "row6": {
        "output": "negative"
    },
    "row7": {
        "output": "positive"
    },
    "row8": {
        "output": "positive"
    },
    "row9": {
        "output": "neutral"
    }
}
'''

user_input = ''

input_data = {}

In [None]:
import pandas as pd
import os

# load in sample IDB data
filename = "another_sample_imdb.csv"  # Replace with your actual CSV file name
my_path = "~/Downloads"
downloads_path = os.path.expanduser(my_path)
file_path = os.path.join(downloads_path, filename)

encodings = ['utf-8', 'utf-16', 'ISO-8859-1', 'latin1']
for encoding in encodings:
    try:
        df = pd.read_csv(file_path, encoding=encoding)
        print(f"File successfully read with encoding: {encoding}")
        print(df.head())  # Display the first few rows of the DataFrame
        break
    except UnicodeDecodeError:
        print(f"Failed with encoding: {encoding}")
    except Exception as e:
        print(f"An error occurred: {e}")

In [None]:
#generate a very small test dataset
small_df = df.head(12)
trimmed_df = small_df[['Plot']]

# Convert the DataFrame to JSON format
sample_input_data = {
    f"row_{index}": {"input": row['Plot']}
    for index, row in trimmed_df.iterrows()
}

print(sample_input_data)

In [None]:
from openai import OpenAI
from dotenv import load_dotenv
client = OpenAI(api_key=os.environ['key'])

# Iterate over the sample_input_data in batches of 10 and generate outputs based on the user_input. Save outputs in output_all.
user_input = "Convert each plot summary into a shorter description of 1-2 sentences and translate it to French."
batch_size = 10
keys = list(sample_input_data.keys())

output_all = []
for i in range(0, len(keys), batch_size):
    batch = {key: sample_input_data[key] for key in keys[i:i + batch_size]}
    
    # Prepare the input for the API
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_input},
        {"role": "assistant", "content": f"Input data: {batch}"}
    ]

    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=messages,
        )
        output = response.choices[0].message.content
        output_all.append(output)

    except Exception as e:
        print(f"Error processing batch {i // batch_size + 1}: {e}")

In [None]:
output_all[0]