
# Bertimbau Base



## Preparação dos Dados

In [1]:
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [4]:
caminho_arquivo = '/content/drive/MyDrive/Trabalho-IA/frases-musicas-final.csv'
df = pd.read_csv(caminho_arquivo)

In [5]:
df.head()

Unnamed: 0,music_id,music_frase_id,frase,frase_id,tem_padrao,padrao_encontrado
0,1,1,Carolina é uma menina bem difícil de esquecer.,1,Female,"[(1916877163388338700, 3, 6), (191687716338833..."
1,1,2,Andar bonito e um brilho no olhar.,2,,
2,1,3,Tem um jeito adolescente que me faz enlouquecer.,3,,
3,1,4,E um molejo que eu não vou te enganar.,4,,
4,1,5,"Maravilha feminina, meu docinho de pavê.",5,,


In [6]:
df['tem_padrao'] = df['tem_padrao'].fillna('Neutro')

In [7]:
female_df = df[df['tem_padrao'] == 'Female'].sample(n=3500, random_state=42)
male_df = df[df['tem_padrao'] == 'Male'].sample(n=3500, random_state=42)
nan_df = df[df['tem_padrao'] == 'Neutro'].sample(n=3500, random_state=42)

df_balanceado = pd.concat([female_df, male_df, nan_df]).sample(frac=1, random_state=42).reset_index(drop=True)
df_balanceado.shape

(10500, 6)

In [8]:
df_balanceado.head()

Unnamed: 0,music_id,music_frase_id,frase,frase_id,tem_padrao,padrao_encontrado
0,130296,21,"O Senhor é meu pastor, nada me faltará.",4061509,Male,"[(3942596749723480963, 1, 5)]"
1,81441,18,Que dependo do teu amor pra viver.,2354064,Neutro,
2,38325,17,Que te rabiscam o corpo todo.,1131707,Neutro,
3,61351,14,Eu não nasci pra coronel..,1786394,Neutro,
4,75160,24,(desce que desce que desce).,2177903,Neutro,


In [9]:
categoria_counts = df_balanceado['tem_padrao'].value_counts()
print(categoria_counts)

tem_padrao
Male      3500
Neutro    3500
Female    3500
Name: count, dtype: int64


In [10]:
X = df_balanceado['frase'].values  # Frases
y = df_balanceado['tem_padrao'].values  # Rótulos

In [11]:
from sklearn.preprocessing import LabelEncoder

In [12]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df_balanceado['tem_padrao'])

In [13]:
classes_mapeadas = dict(zip(label_encoder.classes_, range(len(label_encoder.classes_))))
print(classes_mapeadas)

{'Female': 0, 'Male': 1, 'Neutro': 2}


In [14]:
from sklearn.model_selection import StratifiedKFold
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertForSequenceClassification
from torch.optim import AdamW
import random
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)

In [15]:
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=256):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

## Treinamento

In [16]:
def train_model(model, data_loader, optimizer, device):
    model.train()
    total_loss = 0
    for batch in data_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    return total_loss / len(data_loader)

## Avaliação

In [17]:
from sklearn.metrics import classification_report

In [18]:
def evaluate_model(model, data_loader, device, label_encoder):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
      for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            _, preds = torch.max(outputs.logits, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    report = classification_report(all_labels, all_preds, target_names=label_encoder.classes_)

    return accuracy, precision, recall, f1, report, all_preds

In [19]:
# tokenizer e o modelo BERTimbau
tokenizer = BertTokenizer.from_pretrained('neuralmind/bert-base-portuguese-cased')
model = BertForSequenceClassification.from_pretrained('neuralmind/bert-base-portuguese-cased', num_labels=3)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/43.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/210k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/647 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validação Cruzada Estratificada


In [20]:
# Definir o número de folds para validação cruzada
n_splits = 10 #10 é o mais comum, mas pode aumentar o número de folds desde que contenha no minimo 30 exemplos em cada fold, ex: 100 exemplos anotados, deve ter no maximo 3 folds / Deve-se saber tbm, que quanto maior o número de folds, maior o custo computacional
skf = StratifiedKFold(n_splits=n_splits)

In [21]:
all_fold_metrics = {
    'accuracy': [],
    'precision': [],
    'recall': [],
    'f1': []
}

In [22]:
all_true_labels = []
all_pred_labels = []

## Treinamento e Avaliação de Modelo com Validação Cruzada Estratificada e Balanceamento de Classes

In [23]:
from transformers import logging
logging.set_verbosity_error()

In [24]:
for fold, (train_index, test_index) in enumerate(skf.split(X, y)):
    print(f"\n===== Fold {fold + 1}/10 =====")

    # Dividir os dados
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Criar datasets e dataloaders
    train_dataset = TextDataset(X_train, y_train, tokenizer)
    test_dataset = TextDataset(X_test, y_test, tokenizer)

    BATCH_SIZE = 16
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

    # Reinicializar modelo e otimizador
    model = BertForSequenceClassification.from_pretrained(
        'neuralmind/bert-base-portuguese-cased',
        num_labels=len(label_encoder.classes_)
    )
    model = model.to(device)
    optimizer = AdamW(model.parameters(), lr=1e-5)

    # Treinamento
    train_loss = train_model(model, train_loader, optimizer, device)
    print(f"Loss de treino no Fold {fold + 1}: {train_loss:.4f}")


    torch.save(model.state_dict(), f"modelo_fold_{fold+1}.pt")  # Salvar modelo treinado

    # Avaliação
    accuracy, precision, recall, f1, class_report, all_preds = evaluate_model(model, test_loader, device, label_encoder)

    all_fold_metrics['accuracy'].append(accuracy)
    all_fold_metrics['precision'].append(precision)
    all_fold_metrics['recall'].append(recall)
    all_fold_metrics['f1'].append(f1)

    all_true_labels.extend(y_test)
    all_pred_labels.extend(all_preds)

# Cálculo das métricas finais
final_accuracy = np.mean(all_fold_metrics['accuracy'])
final_precision = np.mean(all_fold_metrics['precision'])
final_recall = np.mean(all_fold_metrics['recall'])
final_f1 = np.mean(all_fold_metrics['f1'])

print(f"\nDesempenho Final:")
print(f"Accuracy Média: {final_accuracy:.4f}")
print(f"Precision Média: {final_precision:.4f}")
print(f"Recall Médio: {final_recall:.4f}")
print(f"F1-score Médio: {final_f1:.4f}")

final_class_report = classification_report(all_true_labels, all_pred_labels, target_names=label_encoder.classes_)
print("\nClassification Report:")
print(final_class_report)


===== Fold 1/10 =====
Loss de treino no Fold 1: 0.2899

===== Fold 2/10 =====
Loss de treino no Fold 2: 0.2860

===== Fold 3/10 =====
Loss de treino no Fold 3: 0.2577

===== Fold 4/10 =====
Loss de treino no Fold 4: 0.2785

===== Fold 5/10 =====
Loss de treino no Fold 5: 0.3085

===== Fold 6/10 =====
Loss de treino no Fold 6: 0.2793

===== Fold 7/10 =====
Loss de treino no Fold 7: 0.2807

===== Fold 8/10 =====
Loss de treino no Fold 8: 0.2786

===== Fold 9/10 =====
Loss de treino no Fold 9: 0.2732

===== Fold 10/10 =====
Loss de treino no Fold 10: 0.2887

Desempenho Final:
Accuracy Média: 0.9667
Precision Média: 0.9669
Recall Médio: 0.9667
F1-score Médio: 0.9666

Classification Report:
              precision    recall  f1-score   support

      Female       0.97      0.98      0.97      3500
        Male       0.96      0.98      0.97      3500
      Neutro       0.97      0.94      0.96      3500

    accuracy                           0.97     10500
   macro avg       0.97      0.9

# SVM e MLP

In [25]:
from transformers import BertModel

bert_model = BertModel.from_pretrained('neuralmind/bert-base-portuguese-cased')
bert_model = bert_model.to(device)
bert_model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(29794, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

Gerar embeddings médios para cada frase

In [26]:
def get_bert_embeddings(texts, tokenizer, model, device, max_length=256):
    embeddings = []

    for text in texts:
        inputs = tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=max_length,
            truncation=True,
            padding='max_length',
            return_tensors='pt'
        )
        input_ids = inputs['input_ids'].to(device)
        attention_mask = inputs['attention_mask'].to(device)

        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            last_hidden_state = outputs.last_hidden_state
            sentence_embedding = torch.mean(last_hidden_state, dim=1).squeeze().cpu().numpy()
            embeddings.append(sentence_embedding)

    return np.array(embeddings)

In [27]:
X_embeddings = get_bert_embeddings(X, tokenizer, bert_model, device)

In [28]:
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report

In [29]:
skf = StratifiedKFold(n_splits=10)

In [30]:
def evaluate_classifier(clf, X, y, name):
    all_preds = []
    all_true = []

    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        clf.fit(X_train, y_train)
        preds = clf.predict(X_test)
        all_preds.extend(preds)
        all_true.extend(y_test)

    print(f"\n==== Resultados para {name} ====")
    print(classification_report(all_true, all_preds, target_names=label_encoder.classes_))

# Classificador MLP
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
evaluate_classifier(mlp, X_embeddings, y, "MLP")

# Classificador SVM
svm = SVC(kernel='linear', probability=True)
evaluate_classifier(svm, X_embeddings, y, "SVM")


==== Resultados para MLP ====
              precision    recall  f1-score   support

      Female       0.92      0.94      0.93      3500
        Male       0.91      0.92      0.92      3500
      Neutro       0.91      0.89      0.90      3500

    accuracy                           0.92     10500
   macro avg       0.92      0.92      0.92     10500
weighted avg       0.92      0.92      0.92     10500


==== Resultados para SVM ====
              precision    recall  f1-score   support

      Female       0.91      0.94      0.92      3500
        Male       0.90      0.90      0.90      3500
      Neutro       0.90      0.87      0.89      3500

    accuracy                           0.90     10500
   macro avg       0.90      0.90      0.90     10500
weighted avg       0.90      0.90      0.90     10500



# GPT

In [31]:
import openai
from tqdm import tqdm

openai.api_key = "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"

In [32]:
from sklearn.model_selection import train_test_split
from collections import Counter

X_gpt, _, y_gpt, _ = train_test_split(X, y, train_size=0.10, stratify=y, random_state=42)
print("Distribuição:", Counter(y_gpt))

Distribuição: Counter({np.int64(2): 350, np.int64(1): 350, np.int64(0): 350})


In [33]:
id_to_label = {i: label for i, label in enumerate(label_encoder.classes_)}

In [34]:
from sklearn.metrics import classification_report

In [35]:
def zero_shot_prompt(frase):
    return f"""
Classifique a frase abaixo como associada ao gênero 'Female', 'Male' ou 'Neutro'.

Frase: "{frase}"

Resposta:"""

In [36]:
def one_shot_prompt(frase):
    exemplo = (
        "Frase: Ela cuida dos filhos com muito carinho.\n"
        "Classificação: Female\n\n"
    )
    return (
        f"{exemplo}"
        f"Frase: {frase}\n"
        f"Classificação:"
    )

In [37]:
def few_shot_prompt(frase):
    exemplos = (
        "Frase: Ela é muito delicada.\nResposta: Female\n"
        "Frase: Ele é um guerreiro determinado.\nResposta: Male\n"
        "Frase: O sol brilhou durante a tarde.\nResposta: Neutro\n"
    )
    return f"{exemplos}Frase: {frase}\nResposta:"

In [38]:
def avaliar_gpt(prompter, X, y, nome_estrategia):
    y_true = []
    y_pred = []

    for i in tqdm(range(len(X)), desc=nome_estrategia):
        prompt = prompter(X[i])
        try:
            response = openai.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0,
                max_tokens=20
            )
            resposta = response.choices[0].message.content.strip().lower()

            if 'female' in resposta:
                pred = label_encoder.transform(['Female'])[0]
            elif 'male' in resposta:
                pred = label_encoder.transform(['Male'])[0]
            else:
                pred = label_encoder.transform(['Neutro'])[0]

            y_pred.append(pred)
            y_true.append(y[i])

        except Exception as e:
            print(f"Erro no exemplo {i}: {e}")

    print(f"\n==== Resultados GPT-3.5 - {nome_estrategia} ====")
    print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))

In [39]:
avaliar_gpt(zero_shot_prompt, X_gpt, y_gpt, "Zero-shot")

Zero-shot: 100%|██████████| 1050/1050 [39:53<00:00,  2.28s/it]


==== Resultados GPT-3.5 - Zero-shot ====
              precision    recall  f1-score   support

      Female       0.86      0.74      0.80       350
        Male       0.72      0.70      0.71       350
      Neutro       0.68      0.80      0.73       350

    accuracy                           0.74      1050
   macro avg       0.75      0.74      0.75      1050
weighted avg       0.75      0.74      0.75      1050






In [40]:
avaliar_gpt(one_shot_prompt, X_gpt, y_gpt, "One-shot")

One-shot: 100%|██████████| 1050/1050 [31:14<00:00,  1.79s/it]


==== Resultados GPT-3.5 - One-shot ====
              precision    recall  f1-score   support

      Female       0.86      0.77      0.81       350
        Male       0.58      0.93      0.71       350
      Neutro       0.87      0.43      0.58       350

    accuracy                           0.71      1050
   macro avg       0.77      0.71      0.70      1050
weighted avg       0.77      0.71      0.70      1050






In [41]:
avaliar_gpt(few_shot_prompt, X_gpt, y_gpt, "Few-shot")

Few-shot: 100%|██████████| 1050/1050 [20:11<00:00,  1.15s/it]


==== Resultados GPT-3.5 - Few-shot ====
              precision    recall  f1-score   support

      Female       0.91      0.66      0.77       350
        Male       0.78      0.73      0.75       350
      Neutro       0.65      0.87      0.74       350

    accuracy                           0.75      1050
   macro avg       0.78      0.75      0.75      1050
weighted avg       0.78      0.75      0.75      1050




