In [None]:
import pandas as pd
import os
import pickle

# Load your CSV file containing emotion labels and texts.
df = pd.read_csv("data/emotions.csv")

# Determine the number of unique labels.
unique_labels = df['label'].unique()
num_labels = len(unique_labels)

# Set your desired total number of samples.
total_samples = 500

# Compute the number of samples per label.
samples_per_label = total_samples // num_labels

# Use groupby and sample to get a balanced dataset.
balanced_df = df.groupby('label', group_keys=False).apply(
    lambda group: group.sample(n=samples_per_label, random_state=42)
)

# Extract texts and labels.
balanced_texts = balanced_df['text'].tolist()
balanced_labels = balanced_df['label'].tolist()



embeddings_path = f"data/model/input/emotions_embeddings_{total_samples}.pkl"
labels_path = f"data/model/labels/emotions_labels_{total_samples}.pkl"

if os.path.exists(embeddings_path):
    with open(embeddings_path, "rb") as f:
        emotions_embeddings = pickle.load(f)
else:
    # Generate embeddings for the balanced texts.
    emotions_embeddings = await utils.batch_generate_embeddings(balanced_texts)
    with open(embeddings_path, "wb") as f:
        pickle.dump(emotions_embeddings, f)


if os.path.exists(labels_path):
    with open(labels_path, "rb") as f:
        emotions_labels = pickle.load(f)
else:    
    # Save the labels to a file.
    with open(labels_path, "wb") as f:
        pickle.dump(balanced_labels, f)


with open("embedded_tasks.pkl", "rb") as f:
    tasks_data = pickle.load(f)

tasks = [Task(**data) for data in tasks_data]

In [None]:
from utils import generate_embedding
from importlib import reload
import filter
reload(filter)
# 3. Initialize the EmotionPredictor with the embeddings and labels.
predictor = filter.EmotionPredictor(emotions_embeddings, balanced_labels)

# 4. Define the model architecture.
predictor.define_model(hidden_size1=500, hidden_size2=150, hidden_size3=25, model_path="./emotions_model.pth")

# 5. Train the model.
predictor.train_model(epochs=200, lr=0.01)

# 6. Evaluate the model on the test set.
predictor.evaluate_model()


In [None]:
import utils
from importlib import reload
reload(utils)
embedding = reports[2].run[1].embeddings
# text_en = await utils.generate_completion(text,agent="translator", model="gpt-4o-mini")
# new_embeddings = await generate_embedding(text)
probabilities = predictor.predict(embedding)
print(f"""
Predicted probabilities for each emotion for the following text:
{reports[2].run[1].report}

sadness: {probabilities[0][0]}, 
joy: {probabilities[0][1]},
love: {probabilities[0][2]},
anger: {probabilities[0][3]},
fear: {probabilities[0][4]},
surprise: {probabilities[0][5]}  
""")

In [None]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Define emotion names (order must match the predictor's output).
emotion_names = ["sadness", "joy", "love", "anger", "fear", "surprise"]
angles = emotion_names + [emotion_names[0]]  # Close the polygon.

# Dictionary to collect classification counts per condition.
# Key: condition (e.g. "Condition 1"), Value: dictionary of counts for each emotion.
condition_counts = {}

for report in reports:
    for run in report.run:
        if run.embeddings is not None and run.report.strip() != "":
            emb = np.array(run.embeddings)
            probs = predictor.predict(emb)
            predicted_idx = np.argmax(probs)
            predicted_emotion = emotion_names[predicted_idx]
            
            # Only use the first condition for this run.
            cond_value = report.condition[0]
            cond_key = f"Condition {cond_value}"
            if cond_key not in condition_counts:
                condition_counts[cond_key] = {emotion: 0 for emotion in emotion_names}
            condition_counts[cond_key][predicted_emotion] += 1

# Optionally, you might want to convert counts to percentages.
# For each condition, divide each count by the total counts for that condition.
condition_percentages = {}
for cond, counts in condition_counts.items():
    total = sum(counts.values())
    # Avoid division by zero.
    if total > 0:
        condition_percentages[cond] = {emotion: count/total for emotion, count in counts.items()}
    else:
        condition_percentages[cond] = {emotion: 0 for emotion in emotion_names}

# For demonstration, here we'll create radar charts to display the percentages.
# Create a subplot with 1 row and as many columns as conditions.
sorted_conditions = sorted(condition_percentages.keys())
fig = make_subplots(
    rows=1, cols=len(sorted_conditions),
    specs=[[{'type': 'polar'}] * len(sorted_conditions)],
    subplot_titles=[f"{cond}" for cond in sorted_conditions]
)

# Define colors for conditions.
colors = {
    "Condition 1": "red",
    "Condition 2": "blue",
    "Condition 3": "green"
}

for i, cond in enumerate(sorted_conditions):
    # Extract percentages in the order of emotion_names.
    percentages = [condition_percentages[cond][emotion] for emotion in emotion_names]
    # Close the polygon by appending the first value.
    percentages_closed = percentages + [percentages[0]]
    
    fig.add_trace(go.Scatterpolar(
        r=percentages_closed,
        theta=angles,
        mode='lines+markers',
        name=f'{cond}',
        line=dict(color=colors.get(cond, "black"))
    ), row=1, col=i+1)

tickvals = [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0]
ticktext = ["0.0", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8", "0.9", "1.0"]

num_conditions = len(sorted_conditions)
for i in range(1, num_conditions + 1):
    polar_id = f"polar{i}" if i > 1 else "polar"
    fig.update_layout({
        polar_id: dict(
            radialaxis=dict(
                tickmode="array",
                tickvals=tickvals,
                ticktext=ticktext,
                range=[0, 1],
                autorange="reversed"
            )
        )
    })

fig.show()