# Código Base

In [None]:
from google.cloud import bigquery
from transformers import pipeline
import pandas as pd
import json

Conectando-se ao BigQuery

In [None]:
# Configurar credenciais para o BigQuery
service_account_json = '/content/pdm-class-rabelo-2024-cfd73100f578.json'
project_id = 'pdm-class-rabelo-2024'

# Conectar ao BigQuery
client = bigquery.Client.from_service_account_json(service_account_json)

# Consultar dados do BigQuery
query = """
    SELECT *
    FROM `pdm-class-rabelo-2024.projeto.infer_df`
    LIMIT 30
"""

query_job = client.query(query)
results = query_job.result().to_dataframe()

print(results)


                                          review_body
0   My 11 month old son is celebrating his first X...
1   I purchased this v-reader book for my 3 year o...
2   I bought this product for my 15 months old dau...
3   We purchased this game based on some family me...
4   Bought this for my daughter for Christmas, it'...
5   This inflatable over-the-hill walker was the h...
6   I know my Grandaughter's will enjoy these doll...
7   This is a costume, not a toy. My son loved bei...
8   Maybe ten million is an exaggeration - but we'...
9   Yes, the markders are permanent. Sigh- we play...
10  We bought this 2 years ago. My daughter still ...
11  This is a very cute stuffed animal. I bought t...
12  I'll admit to being a Halo geek, much more the...
13  The senior class at my son's school created a ...
14  I usually spend a lot of time researching cust...
15  I purchased this for my daughter for Halloween...
16  ***  the tenergy universal smart charger for n...
17  This tutu fits any 18 in

Carregando modelo de predição

In [None]:
# Carregar modelo de análise de sentimento do Hugging Face
model_name = "EdwardSJ151/bert-amazon-reviews"
sentiment_model = pipeline("text-classification", model=model_name)


Device set to use cpu


In [None]:
# Fazer inferência
results['sentiment'] = results['review_body'].apply(lambda x: sentiment_model(x)[0]['label'])
results['score'] = results['review_body'].apply(lambda x: sentiment_model(x)[0]['score'])

# Exibir resultados
print(results.head())

# Salvar resultados em um arquivo CSV
results.to_csv('sentiment_results.csv', index=False)

                                         review_body sentiment     score
0  My 11 month old son is celebrating his first X...   LABEL_2  0.714434
1  I purchased this v-reader book for my 3 year o...   LABEL_1  0.700522
2  I bought this product for my 15 months old dau...   LABEL_1  0.976139
3  We purchased this game based on some family me...   LABEL_2  0.962867
4  Bought this for my daughter for Christmas, it'...   LABEL_2  0.877389


Verificando as predições

In [None]:
# Iterar sobre cada linha completa
for index, row in results.iterrows():
    print(f"Linha {index}: {row.to_dict()}")


Linha 0: {'review_body': "My 11 month old son is celebrating his first Xmas this year, so we got him some Little People toys, including this ice cream truck.  It's super cute and sturdy.  It plays a little song when you press the drive down.  It's a sweet addition to the set  (no pun intended!)", 'sentiment': 'LABEL_2', 'score': 0.7144336104393005}
Linha 1: {'review_body': "I purchased this v-reader book for my 3 year old son.  He Loves the v-reader.  Very easy to use and educational.  I'm excited about the Pooh book because it appears to be educational and fun.", 'sentiment': 'LABEL_1', 'score': 0.7005224823951721}
Linha 2: {'review_body': "I bought this product for my 15 months old daughter. She loves crayons, pencils, stamps etc. I wanted to introduce her to watercolors and do some potato-stamping. I found it a bit odd that you couldn't just get watercolors everywhere. I grew up with pelikan watercolors but they are a bit pricey here. I wasn't expecting much in comparison because of

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Carregar o modelo e o tokenizer
model_name = "EdwardSJ151/bert-amazon-reviews"
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Listar as labels possíveis
labels = model.config.id2label
print("Labels possíveis:", labels)


Labels possíveis: {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}


'LABEL_0': negativo

'LABEL_1': neutro

'LABEL_2': positivo

# main.py

In [4]:
!pip install fastapi

Collecting fastapi
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting starlette<0.42.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Downloading fastapi-0.115.6-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading starlette-0.41.3-py3-none-any.whl (73 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: starlette, fastapi
Successfully installed fastapi-0.115.6 starlette-0.41.3


In [5]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
from transformers import pipeline

# Carregar modelo de análise de sentimento
model_name = "EdwardSJ151/bert-amazon-reviews"
sentiment_model = pipeline("text-classification", model=model_name)

# Inicializar o aplicativo FastAPI
app = FastAPI()

# Definir o esquema de entrada
class SentimentInput(BaseModel):
    texts: List[str]  # Lista de textos para análise

# Definir o esquema de saída
class SentimentOutput(BaseModel):
    label: str
    score: float

@app.post("/sentiment-analysis", response_model=List[SentimentOutput])
async def analyze_sentiments(input_data: SentimentInput):
    try:
        # Fazer inferência para cada texto da entrada
        predictions = [
            {
                "label": sentiment_model(text)[0]["label"],
                "score": sentiment_model(text)[0]["score"]
            }
            for text in input_data.texts
        ]

        return predictions

    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Erro na predição: {str(e)}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/885 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.22k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Device set to use cpu


In [7]:
# Teste simples embutido
if __name__ == "__main__":
    # Dados de teste
    test_input = SentimentInput(
        texts=[
            "This product is amazing!",
            "I am very disappointed with the quality.",
            "It works fine, but could be better."
        ]
    )

    # Simular uma chamada ao endpoint
    try:
        print("Running test...")
        predictions = [
            {
                "label": sentiment_model(text)[0]["label"],
                "score": sentiment_model(text)[0]["score"]
            }
            for text in test_input.texts
        ]

        # Exibir os resultados do teste
        for idx, prediction in enumerate(predictions):
            print(f"Text {idx + 1}: {test_input.texts[idx]}")
            print(f"Prediction: {prediction}")
            print()

    except Exception as e:
        print(f"Erro no teste: {e}")

Running test...
Text 1: This product is amazing!
Prediction: {'label': 'LABEL_2', 'score': 0.9928032755851746}

Text 2: I am very disappointed with the quality.
Prediction: {'label': 'LABEL_0', 'score': 0.9488065838813782}

Text 3: It works fine, but could be better.
Prediction: {'label': 'LABEL_1', 'score': 0.8292544484138489}

