In [1]:
import pandas as pd
import requests
import os
import asyncio

import json
import time

from src import agent_llm

In [None]:
import asyncio
from typing import List, Dict, Any
import pandas as pd

from src.agent_llm import SmartAgentSystem

async def process_batch(batch: List[str]) -> List[Dict[str, Any]]:
    """Processa um batch de comentários usando o SmartAgentSystem"""
    async with SmartAgentSystem() as agent:
        tasks = [agent.expert_agent(comment) for _, comment in batch]
        results = await asyncio.gather(*tasks)
        
        # Associa cada resultado ao texto original e índice
        return [
            {
                "original_index": idx,
                "original_text": text,
                "sentiment": result.get("sentiment") if result else None,
                "metadata": result.get("metadata") if result else None,
                "raw_response": result
            }
            for (idx, text), result in zip(batch, results)
        ]
        

async def process_comments_in_batches(comments: List[str], batch_size: int = 10) -> List[Dict[str, Any]]:
    """Processa todos os comentários em batches assíncronos mantendo a associação completa"""
    all_results = []
    
    # Adiciona índices para rastreamento
    indexed_comments = list(enumerate(comments))
    
    for i in range(0, len(indexed_comments), batch_size):
        batch = indexed_comments[i:i + batch_size]
        print(f"Processando batch {i//batch_size + 1} ({i+1}-{min(i+batch_size, len(comments))} de {len(comments)})")
        
        try:
            batch_results = await process_batch(batch)
            all_results.extend(batch_results)
            
            # Pequena pausa entre batches
            await asyncio.sleep(3)
        except Exception as e:
            print(f"Erro no batch {i//batch_size + 1}: {str(e)}")
            # Adiciona registros vazios para os itens falhos mantendo a ordem
            all_results.extend([
                {
                    "original_index": idx,
                    "original_text": text,
                    "sentiment": None,
                    "metadata": None,
                    "raw_response": None
                }
                for idx, text in batch
            ])
    
    return sorted(all_results, key=lambda x: x["original_index"])  # Garante ordem original  

def expand_results_to_dataframe(df: pd.DataFrame, results: List[Dict[str, Any]]) -> pd.DataFrame:
    """Expande os resultados completos para o DataFrame original"""
    # Cria DataFrame com os resultados
    results_df = pd.DataFrame(results)
    
    # Remove colunas temporárias que não queremos manter
    results_df.drop(columns=['original_index', 'original_text'], inplace=True, errors='ignore')
    
    # Junta com o DataFrame original
    expanded_df = df.copy()
    expanded_df = pd.concat([expanded_df, results_df], axis=1)
    
    # Expande os metadados em colunas separadas
    if 'metadata' in expanded_df.columns:
        metadata_df = expanded_df['metadata'].apply(
            lambda x: pd.Series(x) if isinstance(x, dict) else pd.Series()
        )
        metadata_df = metadata_df.add_prefix('metadata_')
        expanded_df = pd.concat([expanded_df.drop(columns=['metadata']), metadata_df], axis=1)
    
    return expanded_df

In [None]:
df_train = pd.read_parquet('dataset_train_cleaned.parquet').head(20) # Exemplo carregando do Parquet limpo
# Supondo que df_train é seu DataFrame com os comentários
comments_to_process = df_train['comment_cleaned'].tolist()  # Corrigi o nome da coluna para 'comment_cleaned'

In [5]:
comments_to_process

['judging from previous posts this used to be a good place , but not any longer .;',
 'we , there were four of us , arrived at noon - the place was empty - and the staff acted like we were imposing on them and they were very rude .;',
 'they never brought us complimentary noodles , ignored repeated requests for sugar , and threw our dishes on the table .;',
 'the food was lousy - too sweet or too salty and the portions tiny .;',
 'after all that , they complained to me about the small tip .;',
 'avoid this place !;',
 'i have eaten at saul , many times , the food is always consistently , outrageously good .;',
 'saul is the best restaurant on smith street and in brooklyn .;',
 'the duck confit is always amazing and the foie gras terrine with figs was out of this world .;',
 'the wine list is interesting and has many good values .;',
 'for the price , you can not eat this well in manhattan .;',
 'i was very disappointed with this restaurant .;',
 'ive asked a cart attendant for a lotus 

In [16]:
# Processa todos os comentários em batches de 10
results = await process_comments_in_batches(comments_to_process)

# Adiciona os resultados de volta ao DataFrame
df_train['sentiment_result'] = results

Processando batch 1 (1-10 de 20)
Expert Agent Decision: {'sentiment': 'Positive', 'metadata': {'reason': 'high-quality dishes'}}
Expert Agent Decision: {'sentiment': 'Positive', 'metadata': {'reason': 'consistently good food'}}
Expert Agent Decision: {'sentiment': 'Negative', 'metadata': {'reason': 'food quality and portion size'}}
Expert Agent Decision: {'sentiment': 'Negative', 'metadata': {'responsible': 'waiter', 'reason': 'poor service'}}
Expert Agent Decision: {'sentiment': 'Negative', 'metadata': {'responsible': 'staff', 'reason': 'rude behavior'}}
Expert Agent Decision: {'sentiment': 'Negative', 'metadata': {'responsible': 'customer', 'reason': 'unreasonable expectation about tip'}}
Expert Agent Decision: {'sentiment': 'Negative', 'metadata': {'reason': 'decline in quality'}}
Expert Agent Decision: {'sentiment': 'Positive', 'metadata': {'reason': 'good values on wine list'}}
Expert Agent Decision: {'sentiment': 'Positive', 'metadata': {'responsible': 'overall experience', 'reas

In [17]:
# Expande os resultados para o DataFrame original
final_df = expand_results_to_dataframe(df_train, results)
    

In [20]:
final_df

Unnamed: 0,comment,comment_cleaned,sentiment_result,sentiment,sentiment.1,raw_response,metadata_reason,metadata_responsible
0,judging from previous posts this used to be a ...,judging from previous posts this used to be a ...,"{'original_index': 0, 'original_text': 'judgin...",Negative,Negative,"{'sentiment': 'Negative', 'metadata': {'reason...",decline in quality,
1,"we , there were four of us , arrived at noon -...","we , there were four of us , arrived at noon -...","{'original_index': 1, 'original_text': 'we , t...",Negative,Negative,"{'sentiment': 'Negative', 'metadata': {'respon...",rude behavior,staff
2,"they never brought us complimentary noodles , ...","they never brought us complimentary noodles , ...","{'original_index': 2, 'original_text': 'they n...",Negative,Negative,"{'sentiment': 'Negative', 'metadata': {'respon...",poor service,waiter
3,the food was lousy - too sweet or too salty an...,the food was lousy - too sweet or too salty an...,"{'original_index': 3, 'original_text': 'the fo...",Negative,Negative,"{'sentiment': 'Negative', 'metadata': {'reason...",food quality and portion size,
4,"after all that , they complained to me about t...","after all that , they complained to me about t...","{'original_index': 4, 'original_text': 'after ...",Negative,Negative,"{'sentiment': 'Negative', 'metadata': {'respon...",unreasonable expectation about tip,customer
5,avoid this place !;,avoid this place !;,"{'original_index': 5, 'original_text': 'avoid ...",Negative,Negative,"{'sentiment': 'Negative', 'metadata': {'reason...",strong warning,
6,"i have eaten at saul , many times , the food i...","i have eaten at saul , many times , the food i...","{'original_index': 6, 'original_text': 'i have...",Positive,Positive,"{'sentiment': 'Positive', 'metadata': {'reason...",consistently good food,
7,saul is the best restaurant on smith street an...,saul is the best restaurant on smith street an...,"{'original_index': 7, 'original_text': 'saul i...",Positive,Positive,"{'sentiment': 'Positive', 'metadata': {'respon...",highly recommended,overall experience
8,the duck confit is always amazing and the foie...,the duck confit is always amazing and the foie...,"{'original_index': 8, 'original_text': 'the du...",Positive,Positive,"{'sentiment': 'Positive', 'metadata': {'reason...",high-quality dishes,
9,the wine list is interesting and has many good...,the wine list is interesting and has many good...,"{'original_index': 9, 'original_text': 'the wi...",Positive,Positive,"{'sentiment': 'Positive', 'metadata': {'reason...",good values on wine list,


In [22]:
# Carrega os dados do arquivo Parquet
df_train = pd.read_parquet('dataset_train_cleaned.parquet')

# Verifica se a coluna existe
if 'comment_cleaned' not in df_train.columns:
    raise ValueError("O DataFrame não contém a coluna 'comment_cleaned'")

comments_to_process = df_train['comment_cleaned'].tolist()

# Processa todos os comentários em batches de 10
print(f"Iniciando processamento de {len(comments_to_process)} comentários...")
results = await process_comments_in_batches(comments_to_process)

# Expande os resultados para o DataFrame original
final_df = expand_results_to_dataframe(df_train, results)

# Salva os resultados em um novo arquivo Parquet
output_path = 'dataset_train_with_sentiment.parquet'
final_df.to_parquet(output_path, index=False)
print(f"Processamento concluído! Resultados salvos em {output_path}")
print("\nResumo dos resultados:")
print(final_df[['sentiment', 'metadata_responsible', 'metadata_reason']].head())
print(f"\nTotal de comentários processados: {len(results)}")

Iniciando processamento de 651 comentários...
Processando batch 1 (1-10 de 651)
Expert Agent Decision: {'sentiment': 'Positive', 'metadata': {'reason': 'excellent dishes'}}
Expert Agent Decision: {'sentiment': 'Negative', 'metadata': {'reason': 'food quality and portion size'}}
Expert Agent Decision: {'sentiment': 'Positive', 'metadata': {'reason': 'good values on wine list'}}
Expert Agent Decision: {'sentiment': 'Negative', 'metadata': {'responsible': 'staff', 'reason': 'rude behavior'}}
Expert Agent Decision: {'sentiment': 'Positive', 'metadata': {'responsible': 'overall', 'reason': 'highly recommended'}}
Expert Agent Decision: {'sentiment': 'Negative', 'metadata': {'responsible': 'waiter', 'reason': 'poor service, ignored requests, and rude handling of dishes'}}
Expert Agent Decision: {'sentiment': 'Negative', 'metadata': {'responsible': 'customer', 'reason': 'unreasonable expectation about tip'}}
Expert Agent Decision: {'sentiment': 'Negative', 'metadata': {'reason': 'decline in qu