In [1]:
import pandas as pd
merged_df = pd.read_parquet("data/merged_df.parquet")
merged_df.columns

Index(['Title', 'description', 'authors', 'image', 'previewLink', 'publisher',
       'publishedDate', 'infoLink', 'categories', 'ratingsCount', 'Id',
       'Price', 'User_id', 'profileName', 'score', 'time', 'summary', 'text'],
      dtype='object')

In [7]:
from transformers import pipeline
from tqdm import tqdm

def analyze_sentiment(input_df: pd.DataFrame, text_column: str = 'text') -> pd.DataFrame:
    """
    Realiza a análise de sentimento em uma coluna de texto de um DataFrame.

    Args:
        input_df (pd.DataFrame): O DataFrame que contém os dados.
        text_column (str): O nome da coluna com o texto a ser analisado.

    Returns:
        pd.DataFrame: O DataFrame original com duas novas colunas: 
                      'sentiment_label' (POSITIVE/NEGATIVE) e 'sentiment_score'.
    """
    print("Iniciando a análise de sentimento...")
    
    # Faz uma cópia para evitar modificar o DataFrame original fora da função
    df = input_df.copy()

    # 1. Carrega o modelo pré-treinado do Hugging Face
    try:
        sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
    except Exception as e:
        print(f"Erro ao carregar o modelo do Hugging Face: {e}")
        print("Verifique sua conexão com a internet ou as bibliotecas instaladas.")
        return input_df # Retorna o DF original em caso de erro

    # 2. Garante que a coluna de texto exista e não tenha valores nulos
    if text_column not in df.columns:
        print(f"ERRO: A coluna '{text_column}' não foi encontrada no DataFrame.")
        return input_df

    df[text_column] = df[text_column].fillna('')
    texts_to_analyze = df[text_column].tolist()
    
    if not texts_to_analyze:
        print("AVISO: A coluna de texto está vazia. Nenhuma análise a ser feita.")
        return df

    # 3. Executa a análise com uma barra de progresso
    print(f"Analisando {len(texts_to_analyze)} reviews...")
    # O parâmetro truncation=True garante que textos muito longos não causem erro
    # O parâmetro batch_size pode ser ajustado para otimizar a velocidade em GPUs
    results = []
    for out in tqdm(sentiment_pipeline(texts_to_analyze, truncation=True, batch_size=8), total=len(texts_to_analyze)):
        results.append(out)

    # 4. Adiciona os resultados ao DataFrame
    df['sentiment_label'] = [result['label'] for result in results]
    df['sentiment_score'] = [result['score'] for result in results]

    print("\nAnálise de sentimento concluída!")
    return df

df_sentiment = analyze_sentiment(merged_df)
df_sentiment.head()

Iniciando a análise de sentimento...
Erro ao carregar o modelo do Hugging Face: name 'torch' is not defined
Verifique sua conexão com a internet ou as bibliotecas instaladas.


Unnamed: 0,Title,description,authors,image,previewLink,publisher,publishedDate,infoLink,categories,ratingsCount,Id,Price,User_id,profileName,score,time,summary,text
0,Its Only Art If Its Well Hung!,,['Julie Strain'],http://books.google.com/books/content?id=DykPA...,http://books.google.nl/books?id=DykPAAAACAAJ&d...,,1996,http://books.google.nl/books?id=DykPAAAACAAJ&d...,['Comics & Graphic Novels'],,1882931173,,AVCGYZL8FQQTD,"Jim of Oz ""jim-of-oz""",4.0,940636800,Nice collection of Julie Strain images,This is only for Julie Strain fans. It's a col...
1,Dr. Seuss: American Icon,Philip Nel takes a fascinating look into the k...,['Philip Nel'],http://books.google.com/books/content?id=IjvHQ...,http://books.google.nl/books?id=IjvHQsCn_pgC&p...,A&C Black,2005-01-01,http://books.google.nl/books?id=IjvHQsCn_pgC&d...,['Biography & Autobiography'],,826414346,,A30TK6U7DNS82R,Kevin Killian,5.0,1095724800,Really Enjoyed It,I don't care much for Dr. Seuss but after read...
2,Dr. Seuss: American Icon,Philip Nel takes a fascinating look into the k...,['Philip Nel'],http://books.google.com/books/content?id=IjvHQ...,http://books.google.nl/books?id=IjvHQsCn_pgC&p...,A&C Black,2005-01-01,http://books.google.nl/books?id=IjvHQsCn_pgC&d...,['Biography & Autobiography'],,826414346,,A3UH4UZ4RSVO82,John Granger,5.0,1078790400,Essential for every personal and Public Library,"If people become the books they read and if ""t..."
3,Dr. Seuss: American Icon,Philip Nel takes a fascinating look into the k...,['Philip Nel'],http://books.google.com/books/content?id=IjvHQ...,http://books.google.nl/books?id=IjvHQsCn_pgC&p...,A&C Black,2005-01-01,http://books.google.nl/books?id=IjvHQsCn_pgC&d...,['Biography & Autobiography'],,826414346,,A2MVUWT453QH61,"Roy E. Perry ""amateur philosopher""",4.0,1090713600,Phlip Nel gives silly Seuss a serious treatment,"Theodore Seuss Geisel (1904-1991), aka &quot;D..."
4,Dr. Seuss: American Icon,Philip Nel takes a fascinating look into the k...,['Philip Nel'],http://books.google.com/books/content?id=IjvHQ...,http://books.google.nl/books?id=IjvHQsCn_pgC&p...,A&C Black,2005-01-01,http://books.google.nl/books?id=IjvHQsCn_pgC&d...,['Biography & Autobiography'],,826414346,,A22X4XUPKF66MR,"D. H. Richards ""ninthwavestore""",4.0,1107993600,Good academic overview,Philip Nel - Dr. Seuss: American IconThis is b...


In [None]:
! pip install pip install pandas transformers torch tqdm

Collecting pytorch
  Using cached pytorch-1.0.2.tar.gz (689 bytes)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: pytorch
  Building wheel for pytorch (pyproject.toml): started
  Building wheel for pytorch (pyproject.toml): finished with status 'error'
Failed to build pytorch


  error: subprocess-exited-with-error
  
  × Building wheel for pytorch (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [23 lines of output]
      Traceback (most recent call last):
        File "c:\Users\Pichau\Downloads\A3Data\.venv\Lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 389, in <module>
          main()
        File "c:\Users\Pichau\Downloads\A3Data\.venv\Lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 373, in main
          json_out["return_val"] = hook(**hook_input["kwargs"])
                                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        File "c:\Users\Pichau\Downloads\A3Data\.venv\Lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 280, in build_wheel
          return _build_backend().build_wheel(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        File "C:\Users\Pichau\AppData\Local\Temp\pip-build-env-yykxs_oe\overlay\Lib\site-packages\setuptools\bui