In [17]:
from predictionguard import PredictionGuard
import os
import pandas as pd
import concurrent.futures

In [56]:
# Input File Name
inputfile = 'CDE.bagofwords.csv'

# Output File Name
outputfile = 'CDE.bagofwords.csv'

# Translated Column Messages
messagecolumn = 'Product Review'

# Import Data
data = pd.read_csv(inputfile)

In [41]:
# Set API key
api_key = os.getenv("PREDICTIONGUARD_API_KEY", "Oq62vYfSJRwjnFQcUnJy5PM3SRVejYtJCXWSxnfv")

# Initialize the PredictionGuard client
client = PredictionGuard(api_key=api_key)

# System Behavior
system_message = {
    "role": "system",
    "content": (
        "You are our personal sentiment score evaluator for beers. Your task is to assess the product reviews and assign a sentiment score value between -1 and 1. \n"
        "Please account for potential sarcasm and slang terms. \n"
        "The context is beer, so please keep in mind how word meanings and their sentiments may change in this context. \n"
        "Do not provide any explanations or contextual information. Only return the sentiment score of the message."
    )
}

def process_message(row):
    try:
        user_message = row
        # Prepare the messages list for the chatbot
        messages = [
            system_message,
            {
                "role": "user",
                "content": f"{user_message}"
            }
        ]
        
        # Send the message to the PredictionGuard API
        result = client.chat.completions.create(
            model="Hermes-3-Llama-3.1-8B",
            messages=messages
        )
        
        # Extract the chatbot's response
        response = result['choices'][0]['message']['content']
        return response
    except Exception as e:
        return f"Error: {str(e)}"
    
# Process in Parallel 
def process_in_parallel(data_column, max_workers=5):
    # Counter for processed messages
    count = 0
    
    # Function to update the counter and print progress
    def process_message_with_checker(row):
        nonlocal count
        count += 1
        if count % 100 == 0:
            print(f"Processed {count} messages")
        return process_message(row)

    # Use ThreadPoolExecutor for parallel processing
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        responses = list(executor.map(process_message_with_checker, data_column))
    return responses
    

In [42]:
data_subset = data[messagecolumn]

# Run sentiment analysis in parallel with 5 threads
response = process_in_parallel(data_subset, max_workers=5)

# The result is a list of responses
print(response)

Processed 100 messages
Processed 200 messages
Processed 300 messages
Processed 400 messages
Processed 500 messages
Processed 600 messages
Processed 700 messages
Processed 800 messages
Processed 900 messages
Processed 1000 messages
Processed 1100 messages
Processed 1200 messages
Processed 1300 messages
Processed 1400 messages
Processed 1500 messages
Processed 1600 messages
Processed 1700 messages
Processed 1800 messages
Processed 1900 messages
Processed 2000 messages
Processed 2100 messages
Processed 2200 messages
Processed 2300 messages
Processed 2400 messages
Processed 2500 messages
Processed 2600 messages
Processed 2700 messages
Processed 2800 messages
Processed 2900 messages
Processed 3000 messages
Processed 3100 messages
Processed 3200 messages
Processed 3300 messages
Processed 3400 messages
Processed 3500 messages
Processed 3600 messages
Processed 3700 messages
Processed 3800 messages
Processed 3900 messages
Processed 4000 messages
Processed 4100 messages
Processed 4200 messages
P

In [57]:
data['Sentiment Scores'] = response
data = data.drop(columns=['Unnamed: 0'])

In [58]:
# Output to csv
data.to_csv(outputfile, index = False)