## 1. Importa√ß√£o de Bibliotecas Essenciais

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sqlite3
import warnings
import time
from collections import Counter
warnings.filterwarnings('ignore')

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR

# Processamento de texto
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import SnowballStemmer
import re
from sklearn.preprocessing import LabelEncoder

# M√©tricas e avalia√ß√£o
from sklearn.metrics import (
    classification_report, confusion_matrix, accuracy_score,
    precision_score, recall_score, f1_score, roc_auc_score,
    ConfusionMatrixDisplay
)
from sklearn.model_selection import train_test_split

# Configura√ß√µes
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

# Download NLTK resources
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

# Configurar device (GPU se dispon√≠vel)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"‚úì Device: {device}")
if torch.cuda.is_available():
    print(f"‚úì GPU: {torch.cuda.get_device_name(0)}")
    print(f"‚úì CUDA Version: {torch.version.cuda}")

print(f"‚úì PyTorch vers√£o: {torch.__version__}")
print(f"‚úì NumPy vers√£o: {np.__version__}")

## 2. Carregar e Gerar Dataset de Grande Escala (80k amostras)

In [None]:
# Par√¢metros
DB_PATH = "/gdrive/MyDrive/Colab Notebooks/JFRN/split_dados_01.sqlite3"
TABLE_NAME = "peticoes"
DATASET_SIZE = 80000

print(f"Carregando/Gerando dataset com {DATASET_SIZE:,} amostras...")

# Tentar carregar do SQLite
try:
    conn = sqlite3.connect(DB_PATH)
    query = f"SELECT * FROM {TABLE_NAME} LIMIT {DATASET_SIZE}"
    df = pd.read_sql_query(query, conn)
    conn.close()
    print(f"‚úì Dados carregados do SQLite: {df.shape}")
except Exception as e:
    print(f"‚ö† Erro ao carregar SQLite: {e}")
    print(f"Gerando dataset sint√©tico com {DATASET_SIZE:,} amostras...\n")
    
    # Corpus variado de peti√ß√µes jur√≠dicas
    corpus_templates = [
        # Peti√ß√µes deferidas (linguagem favor√°vel)
        "Fundamentado em jurisprud√™ncia consolidada, solicita-se deferimento integral do pedido",
        "Documenta√ß√£o completa e direito inequ√≠voco justificam a concess√£o pleiteada",
        "Precedentes do STJ e STF amplaram o reconhecimento desta modalidade de direito",
        "Ampla documenta√ß√£o apresentada comprova inequivocamente o direito invocado",
        "Recurso extraordin√°rio com repercuss√£o geral reconhecida sobre mat√©ria j√° pacificada",
        
        # Peti√ß√µes indeferidas (linguagem desfavor√°vel)
        "Faltam elementos essenciais √† constitui√ß√£o da rela√ß√£o jur√≠dica alegada",
        "Prescri√ß√£o consumada extingue o direito de a√ß√£o pelo lapso temporal",
        "Lacks documentary evidence and legal grounds for the claim presented",
        "Recurso manifestamente infundado contradiz jurisprud√™ncia consolidada",
        "Car√™ncia de legitimidade ativa e passiva invalida a demanda",
        
        # Peti√ß√µes parcialmente deferidas (linguagem mista)
        "Parcialmente fundado o recurso, reconhecendo-se apenas parte da pretens√£o",
        "Parte do pleito merece acolhimento, com modula√ß√£o de efeitos",
        "Alguns pedidos prosperam, outros carecem de fundamenta√ß√£o adequada",
        "Condena√ß√£o parcial procedente quanto aos danos materiais solicitados",
        "Direito reconhecido em sua integralidade, exceto quanto √† indeniza√ß√£o por lucros cessantes",
        
        # Conte√∫dos variados (neutros)
        "Conforme estatu√≠do no artigo 535 do C√≥digo de Processo Civil",
        "A legisla√ß√£o processual estabelece requisitos formais espec√≠ficos",
        "Compet√™ncia origin√°ria da Corte Superior observados os crit√©rios legais",
        "Procedimento ordin√°rio com todas as fases processuais cumpridas adequadamente",
        "Apela√ß√£o em conformidade com os prazos legalmente estabelecidos pelo c√≥digo processual",
    ]
    
    outcomes = ['Deferida', 'Indeferida', 'Parcialmente_Deferida']
    
    # Gerar dataset variado
    texts = []
    labels = []
    
    np.random.seed(42)
    for _ in range(DATASET_SIZE):
        # Combinar templates para criar varia√ß√£o
        num_templates = np.random.randint(2, 5)
        selected = np.random.choice(corpus_templates, num_templates, replace=True)
        text = ". ".join(selected) + "."
        
        # Atribuir label (com distribui√ß√£o realista)
        label = np.random.choice(outcomes, p=[0.40, 0.35, 0.25])
        
        texts.append(text)
        labels.append(label)
    
    df = pd.DataFrame({
        'texto_peticao': texts,
        'desfecho': labels,
        'data': pd.date_range('2015-01-01', periods=DATASET_SIZE, freq='h'),
        'valor': np.random.uniform(1000, 500000, DATASET_SIZE)
    })
    
    print(f"‚úì Dataset sint√©tico gerado: {df.shape}")

# An√°lise explorat√≥ria
print("\n" + "="*80)
print("AN√ÅLISE EXPLORAT√ìRIA DO DATASET")
print("="*80)
print(f"\nPrimeiras amostras:")
print(df.head(10))

print(f"\nDistribui√ß√£o de desfechos:")
distribution = df['desfecho'].value_counts()
print(distribution)
print(f"\nPercentual:")
print((df['desfecho'].value_counts(normalize=True) * 100).round(2))

# Visualizar distribui√ß√£o
fig, ax = plt.subplots(figsize=(10, 5))
distribution.plot(kind='bar', edgecolor='black', alpha=0.7, ax=ax)
ax.set_title('Distribui√ß√£o de Desfechos - Dataset Grande Escala', fontsize=14, fontweight='bold')
ax.set_xlabel('Desfecho')
ax.set_ylabel('Frequ√™ncia')
ax.set_xticklabels(ax.get_xticklabels(), rotation=45)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Pr√©-processamento de Texto

In [None]:
def preprocess_text(text):
    """Pr√©-processar texto para CNN"""
    if pd.isna(text):
        return ""
    
    text = str(text).lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'\S+@\S+', '', text)
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'[^\w\s]', ' ', text)
    
    tokens = word_tokenize(text, language='portuguese')
    stop_words = set(stopwords.words('portuguese'))
    tokens = [word for word in tokens if word not in stop_words and len(word) > 2]
    
    stemmer = SnowballStemmer('portuguese')
    tokens = [stemmer.stem(word) for word in tokens]
    
    return ' '.join(tokens)

# Pr√©-processar textos
print("Pr√©-processando textos...")
start_time = time.time()

df['texto_processado'] = df['texto_peticao'].apply(preprocess_text)

elapsed = time.time() - start_time
print(f"‚úì Pr√©-processamento conclu√≠do em {elapsed:.2f}s")
print(f"  - Taxa: {len(df)/elapsed:.0f} textos/s")

print(f"\nExemplo antes: {df['texto_peticao'].iloc[0][:80]}...")
print(f"Exemplo depois: {df['texto_processado'].iloc[0][:80]}...")

# Estat√≠sticas de comprimento
text_lengths = df['texto_processado'].str.split().str.len()
print(f"\nEstat√≠sticas de comprimento:")
print(f"  - M√©dio: {text_lengths.mean():.0f} palavras")
print(f"  - Min: {text_lengths.min()}, Max: {text_lengths.max()}")
print(f"  - Mediana: {text_lengths.median():.0f}, Std: {text_lengths.std():.0f}")

# Visualizar distribui√ß√£o
fig, ax = plt.subplots(figsize=(12, 5))
ax.hist(text_lengths, bins=50, edgecolor='black', alpha=0.7)
ax.set_xlabel('N√∫mero de Palavras')
ax.set_ylabel('Frequ√™ncia')
ax.set_title('Distribui√ß√£o de Comprimento dos Textos Pr√©-processados')
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Tokeniza√ß√£o com PyTorch
VOCAB_SIZE = 10000
MAX_SEQUENCE_LENGTH = 300

print("\n" + "="*80)
print("TOKENIZA√á√ÉO E PREPARA√á√ÉO DE SEQU√äNCIAS")
print("="*80)

# Construir vocabul√°rio
print("\nConstruindo vocabul√°rio...")
word_counts = Counter()
for text in df['texto_processado']:
    words = text.split()
    word_counts.update(words)

# Manter apenas as palavras mais frequentes
vocabulary = {word: idx + 1 for idx, (word, count) in 
              enumerate(word_counts.most_common(VOCAB_SIZE - 1))}
vocabulary['<UNK>'] = 0  # Token para palavras desconhecidas

print(f"‚úì Vocabul√°rio constru√≠do: {len(vocabulary)} palavras")

# Converter textos para sequ√™ncias de √≠ndices
def text_to_sequence(text, vocab, max_len):
    words = text.split()
    sequence = [vocab.get(word, 0) for word in words]
    
    # Padding ou truncamento
    if len(sequence) < max_len:
        sequence += [0] * (max_len - len(sequence))
    else:
        sequence = sequence[:max_len]
    
    return sequence

print("\nConvertendo textos para sequ√™ncias...")
X = np.array([text_to_sequence(text, vocabulary, MAX_SEQUENCE_LENGTH) 
              for text in df['texto_processado']])

print(f"‚úì Sequ√™ncias criadas: {X.shape}")

# Codificar labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['desfecho'])

print(f"‚úì Labels codificados: {label_encoder.classes_}")
print(f"  - Mapeamento: {dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))}")

# Dividir dados: 70% treino, 15% valida√ß√£o, 15% teste
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.15, random_state=42, stratify=y
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.15/(0.85), random_state=42, stratify=y_temp
)

print(f"\nDivis√£o do dataset:")
print(f"  - Treino: {X_train.shape[0]:,} amostras ({X_train.shape[0]/len(X)*100:.1f}%)")
print(f"  - Valida√ß√£o: {X_val.shape[0]:,} amostras ({X_val.shape[0]/len(X)*100:.1f}%)")
print(f"  - Teste: {X_test.shape[0]:,} amostras ({X_test.shape[0]/len(X)*100:.1f}%)")

## 4. Dataset e DataLoader em PyTorch

In [None]:
class PeticionDataset(Dataset):
    """Dataset customizado para peti√ß√µes jur√≠dicas"""
    def __init__(self, X, y):
        self.X = torch.LongTensor(X)
        self.y = torch.LongTensor(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Criar datasets
train_dataset = PeticionDataset(X_train, y_train)
val_dataset = PeticionDataset(X_val, y_val)
test_dataset = PeticionDataset(X_test, y_test)

# Hyperpar√¢metros
BATCH_SIZE = 128
NUM_WORKERS = 4

# Criar DataLoaders
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True if torch.cuda.is_available() else False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True if torch.cuda.is_available() else False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True if torch.cuda.is_available() else False
)

print("="*80)
print("DATALOADERS CRIADOS")
print("="*80)
print(f"‚úì Train DataLoader: {len(train_loader)} batches")
print(f"‚úì Val DataLoader: {len(val_loader)} batches")
print(f"‚úì Test DataLoader: {len(test_loader)} batches")

# Verificar um batch
X_sample, y_sample = next(iter(train_loader))
print(f"\nSample batch shape:")
print(f"  - X: {X_sample.shape}")
print(f"  - y: {y_sample.shape}")
print(f"‚úì DataLoaders prontos para treinamento!")

## 5. Arquitetura CNN 1D Profunda em PyTorch

### Caracter√≠sticas:
- **Embedding**: Converte √≠ndices em vetores densos (100D)
- **Blocos Convolucionais**: 4 blocos com skip connections
- **Filtros variados**: Tamanhos 3, 4, 5, 7
- **Regulariza√ß√£o**: BatchNorm, Dropout (0.3-0.5)
- **Pooling**: Global Average Pooling
- **Camadas Densas**: 512 ‚Üí 256 ‚Üí num_classes

In [None]:
class ResidualConvBlock(nn.Module):
    """Bloco convolucional com residual connection"""
    def __init__(self, in_channels, out_channels, kernel_size, padding):
        super(ResidualConvBlock, self).__init__()
        
        self.conv = nn.Conv1d(
            in_channels, out_channels,
            kernel_size=kernel_size,
            padding=padding,
            bias=False
        )
        self.bn = nn.BatchNorm1d(out_channels)
        self.dropout = nn.Dropout(0.3)
        
        # Skip connection (1x1 conv se dimens√µes forem diferentes)
        self.skip = nn.Conv1d(in_channels, out_channels, kernel_size=1) if in_channels != out_channels else nn.Identity()
        
    def forward(self, x):
        residual = self.skip(x)
        
        x = self.conv(x)
        x = self.bn(x)
        x = F.relu(x)
        x = self.dropout(x)
        
        x = x + residual
        return x

class DeepCNN1D(nn.Module):
    """CNN 1D Profunda para classifica√ß√£o de peti√ß√µes"""
    def __init__(self, vocab_size, num_classes, embedding_dim=100, hidden_dim=256):
        super(DeepCNN1D, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        # Blocos convolucionais com diferentes kernel sizes
        filter_sizes = [3, 4, 5, 7]
        num_filters = 100
        
        self.conv_blocks = nn.ModuleList([
            ResidualConvBlock(embedding_dim, num_filters, kernel_size, padding=kernel_size//2)
            for kernel_size in filter_sizes
        ])
        
        # Pooling e dropout
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.dropout1 = nn.Dropout(0.4)
        
        # Camadas densas
        total_conv_out = len(filter_sizes) * num_filters
        
        self.fc1 = nn.Linear(total_conv_out, hidden_dim)
        self.bn_fc1 = nn.BatchNorm1d(hidden_dim)
        self.dropout2 = nn.Dropout(0.4)
        
        self.fc2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.bn_fc2 = nn.BatchNorm1d(hidden_dim // 2)
        self.dropout3 = nn.Dropout(0.3)
        
        self.fc_out = nn.Linear(hidden_dim // 2, num_classes)
    
    def forward(self, x):
        # Embedding
        x = self.embedding(x)  # (batch, seq_len) -> (batch, seq_len, embed_dim)
        x = x.transpose(1, 2)  # (batch, embed_dim, seq_len) para Conv1d
        
        # M√∫ltiplos blocos convolucionais em paralelo
        conv_outputs = []
        for conv_block in self.conv_blocks:
            conv_out = conv_block(x)  # (batch, num_filters, seq_len)
            pool_out = self.pool(conv_out)  # (batch, num_filters, 1)
            pool_out = pool_out.squeeze(-1)  # (batch, num_filters)
            conv_outputs.append(pool_out)
        
        # Concatenar outputs
        x = torch.cat(conv_outputs, dim=1)  # (batch, total_conv_out)
        x = self.dropout1(x)
        
        # Camadas densas
        x = self.fc1(x)
        x = self.bn_fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        
        x = self.fc2(x)
        x = self.bn_fc2(x)
        x = F.relu(x)
        x = self.dropout3(x)
        
        x = self.fc_out(x)
        return x

# Criar modelo
print("\n" + "="*80)
print("CONSTRUINDO MODELO CNN 1D PROFUNDO")
print("="*80)

num_classes = len(label_encoder.classes_)
model = DeepCNN1D(
    vocab_size=VOCAB_SIZE + 1,
    num_classes=num_classes,
    embedding_dim=100,
    hidden_dim=256
).to(device)

# Resumo do modelo
print(f"\n‚úì Modelo criado com sucesso!")
print(f"\nArquitetura:")
print(model)

# Contar par√¢metros
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\nPar√¢metros do modelo:")
print(f"  - Total: {total_params:,}")
print(f"  - Trein√°veis: {trainable_params:,}")

## 6. Compilar e Configurar Treinamento

In [None]:
print("\n" + "="*80)
print("CONFIGURANDO TREINAMENTO")
print("="*80)

# Hiperpar√¢metros
LEARNING_RATE = 0.001
EPOCHS = 30
PATIENCE = 5
WEIGHT_DECAY = 1e-5

# Loss e otimizador
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

# Scheduler
scheduler = ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=3,
    verbose=True,
    min_lr=1e-7
)

# Vari√°veis para Early Stopping
best_val_loss = float('inf')
patience_counter = 0

# Hist√≥rico de treinamento
history = {
    'train_loss': [],
    'val_loss': [],
    'train_acc': [],
    'val_acc': []
}

print(f"\n‚úì Configura√ß√µes:")
print(f"  - Learning Rate: {LEARNING_RATE}")
print(f"  - Epochs: {EPOCHS}")
print(f"  - Batch Size: {BATCH_SIZE}")
print(f"  - Optimizer: AdamW (weight_decay={WEIGHT_DECAY})")
print(f"  - Loss: CrossEntropyLoss")
print(f"  - Early Stopping Patience: {PATIENCE} √©pocas")
print(f"  - Device: {device}")

## 7. Fun√ß√µes de Treinamento e Avalia√ß√£o

In [None]:
def train_epoch(model, train_loader, criterion, optimizer, device):
    """Treinar uma √©poca"""
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    pbar = tqdm(train_loader, desc="Treinamento")
    for inputs, labels in pbar:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        # Estat√≠sticas
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        pbar.set_postfix({'loss': loss.item():.4f}, refresh=True)
    
    avg_loss = total_loss / len(train_loader)
    accuracy = correct / total
    
    return avg_loss, accuracy

def validate(model, val_loader, criterion, device):
    """Validar modelo"""
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    avg_loss = total_loss / len(val_loader)
    accuracy = correct / total
    
    return avg_loss, accuracy

# Importar tqdm para barra de progresso
from tqdm.notebook import tqdm

print("‚úì Fun√ß√µes de treinamento definidas")

## 8. Treinar Modelo (8. Treinar Modelo (pode levar 30-60 minutos em CPU, 5-10 minutos em GPU)

In [None]:
print("\n" + "="*80)
print("TREINANDO MODELO")
print("="*80)

start_training = time.time()

for epoch in range(EPOCHS):
    print(f"\n{'='*80}")
    print(f"√âpoca {epoch + 1}/{EPOCHS}")
    print(f"{'='*80}")
    
    # Treinar
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    
    # Validar
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    
    # Guardar hist√≥rico
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    # Imprimir m√©tricas
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
    
    # Learning rate scheduler
    scheduler.step(val_loss)
    
    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        print(f"‚úì Melhor modelo salvo! Val Loss: {val_loss:.4f}")
        torch.save(model.state_dict(), 'best_model.pth')
    else:
        patience_counter += 1
        print(f"Sem melhora. Paci√™ncia: {patience_counter}/{PATIENCE}")
        
        if patience_counter >= PATIENCE:
            print(f"\n‚ö† Early stopping acionado na √©poca {epoch + 1}")
            break

elapsed_training = time.time() - start_training
print(f"\n‚úì Treinamento conclu√≠do em {elapsed_training/60:.2f} minutos")

# Carregar melhor modelo
model.load_state_dict(torch.load('best_model.pth'))
print("‚úì Melhor modelo carregado para avalia√ß√£o")

## 9. Avaliar no Conjunto de Teste

In [None]:
print("\n" + "="*80)
print("AVALIA√á√ÉO NO CONJUNTO DE TESTE")
print("="*80)

# Fazer predi√ß√µes no conjunto de teste
model.eval()
y_pred_list = []
y_prob_list = []
y_true_list = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        
        outputs = model(inputs)
        probs = F.softmax(outputs, dim=1)
        
        _, predicted = torch.max(outputs, 1)
        
        y_pred_list.extend(predicted.cpu().numpy())
        y_prob_list.extend(probs.cpu().numpy())
        y_true_list.extend(labels.numpy())

y_pred = np.array(y_pred_list)
y_prob = np.array(y_prob_list)
y_true = np.array(y_true_list)

# M√©tricas
test_accuracy = accuracy_score(y_true, y_pred)
test_precision = precision_score(y_true, y_pred, average='weighted', zero_division=0)
test_recall = recall_score(y_true, y_pred, average='weighted', zero_division=0)
test_f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)

print(f"\nM√©tricas no Conjunto de Teste:")
print(f"  - Acur√°cia: {test_accuracy:.4f}")
print(f"  - Precis√£o (ponderada): {test_precision:.4f}")
print(f"  - Recall (ponderado): {test_recall:.4f}")
print(f"  - F1-Score (ponderado): {test_f1:.4f}")

# Relat√≥rio de classifica√ß√£o
print(f"\n{'='*80}")
print("RELAT√ìRIO DE CLASSIFICA√á√ÉO DETALHADO")
print(f"{'='*80}")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_, digits=4))

# Matriz de confus√£o
cm = confusion_matrix(y_true, y_pred)
print(f"\nMatriz de Confus√£o:")
print(cm)

## 10. Realizar Predi√ß√µes em Novos Dados

In [None]:
def predict_petition(text, model, vocabulary, label_encoder, device, max_len=MAX_SEQUENCE_LENGTH):
    """Fazer predi√ß√£o para um novo texto"""
    # Pr√©-processar
    processed = preprocess_text(text)
    
    # Converter para sequ√™ncia
    sequence = text_to_sequence(processed, vocabulary, max_len)
    
    # Converter para tensor
    X = torch.LongTensor([sequence]).to(device)
    
    # Fazer predi√ß√£o
    model.eval()
    with torch.no_grad():
        output = model(X)
        probs = F.softmax(output, dim=1)
        predicted_class = torch.argmax(probs, dim=1).item()
        confidence = probs[0, predicted_class].item()
    
    # Inverter labels
    predicted_label = label_encoder.inverse_transform([predicted_class])[0]
    
    return {
        'texto': text[:100] + '...' if len(text) > 100 else text,
        'desfecho_predito': predicted_label,
        'confianca': confidence,
        'probabilidades': {
            label: float(probs[0, i].item()) 
            for i, label in enumerate(label_encoder.classes_)
        }
    }

print("\n" + "="*80)
print("PREDI√á√ïES EM NOVOS DADOS")
print("="*80)

# Testar com exemplos do conjunto de teste
print("\nExemplos de predi√ß√µes do conjunto de teste:\n")

indices = np.random.choice(len(X_test), min(5, len(X_test)), replace=False)
for i, idx in enumerate(indices):
    # Recuperar texto original
    idx_original = np.where((X == X_test[idx]).all(axis=1))[0]
    if len(idx_original) > 0:
        texto_original = df['texto_peticao'].iloc[idx_original[0]]
    else:
        texto_original = "Texto n√£o encontrado"
    
    resultado_real = label_encoder.classes_[y_true[idx]]
    resultado_predito = label_encoder.classes_[y_pred[idx]]
    confianca = y_prob[idx, y_pred[idx]]
    
    print(f"Exemplo {i+1}:")
    print(f"  Texto: {texto_original[:80]}...")
    print(f"  Resultado real: {resultado_real}")
    print(f"  Resultado predito: {resultado_predito}")
    print(f"  Confian√ßa: {confianca:.4f}")
    print(f"  Status: {'‚úì Acerto' if resultado_real == resultado_predito else '‚úó Erro'}")
    print()

# Testar com novos textos
print("\nPredi√ß√µes em novos textos de exemplo:\n")

novos_textos = [
    "Recurso extraordin√°rio fundamentado em viola√ß√£o de direito constitucional com ampla jurisprud√™ncia consolidada",
    "Peti√ß√£o inicial carente de elementos essenciais e faltando documenta√ß√£o b√°sica",
    "Apela√ß√£o questionando parte da senten√ßa anterior com argumentos moderados"
]

for texto in novos_textos:
    resultado = predict_petition(texto, model, vocabulary, label_encoder, device)
    print(f"Texto: {resultado['texto']}")
    print(f"Desfecho predito: {resultado['desfecho_predito']}")
    print(f"Confian√ßa: {resultado['confianca']:.4f}")
    print(f"Probabilidades por classe:")
    for classe, prob in resultado['probabilidades'].items():
        print(f"  - {classe}: {prob:.4f}")
    print()

## 11. Visualizar Resultados e M√©tricas

In [None]:
print("\n" + "="*80)
print("VISUALIZA√á√ïES DE RESULTADOS")
print("="*80)

# 1. Hist√≥rico de Loss e Acur√°cia
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# Loss
axes[0].plot(history['train_loss'], label='Train Loss', linewidth=2, marker='o', markersize=4)
axes[0].plot(history['val_loss'], label='Val Loss', linewidth=2, marker='s', markersize=4)
axes[0].set_xlabel('√âpoca', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Loss', fontsize=12, fontweight='bold')
axes[0].set_title('Hist√≥rico de Loss durante Treinamento', fontsize=14, fontweight='bold')
axes[0].legend(fontsize=11)
axes[0].grid(True, alpha=0.3)

# Acur√°cia
axes[1].plot(history['train_acc'], label='Train Accuracy', linewidth=2, marker='o', markersize=4)
axes[1].plot(history['val_acc'], label='Val Accuracy', linewidth=2, marker='s', markersize=4)
axes[1].set_xlabel('√âpoca', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Acur√°cia', fontsize=12, fontweight='bold')
axes[1].set_title('Hist√≥rico de Acur√°cia durante Treinamento', fontsize=14, fontweight='bold')
axes[1].legend(fontsize=11)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# 2. Matriz de Confus√£o
fig, ax = plt.subplots(figsize=(10, 8))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_encoder.classes_)
disp.plot(ax=ax, cmap='Blues', values_format='d')
plt.title('Matriz de Confus√£o - Conjunto de Teste', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

# 3. M√©tricas por classe
from sklearn.metrics import precision_recall_fscore_support

precision_per_class, recall_per_class, f1_per_class, support = precision_recall_fscore_support(
    y_true, y_pred, labels=range(len(label_encoder.classes_))
)

fig, ax = plt.subplots(figsize=(12, 6))
x = np.arange(len(label_encoder.classes_))
width = 0.25

ax.bar(x - width, precision_per_class, width, label='Precis√£o', alpha=0.8)
ax.bar(x, recall_per_class, width, label='Recall', alpha=0.8)
ax.bar(x + width, f1_per_class, width, label='F1-Score', alpha=0.8)

ax.set_xlabel('Classe', fontsize=12, fontweight='bold')
ax.set_ylabel('Score', fontsize=12, fontweight='bold')
ax.set_title('M√©tricas de Desempenho por Classe', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(label_encoder.classes_)
ax.legend(fontsize=11)
ax.set_ylim([0, 1.1])
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

print("\n‚úì Visualiza√ß√µes geradas com sucesso!")

In [None]:
print("\n" + "="*80)
print("VISUALIZA√á√ïES ADICIONAIS")
print("="*80)

# 4. Distribui√ß√£o de confian√ßa por classe
fig, axes = plt.subplots(1, len(label_encoder.classes_), figsize=(16, 5))

for idx, label in enumerate(label_encoder.classes_):
    class_indices = y_true == idx
    class_probs = y_prob[class_indices, idx]
    
    axes[idx].hist(class_probs, bins=30, edgecolor='black', alpha=0.7, color=f'C{idx}')
    axes[idx].set_xlabel('Confian√ßa', fontsize=11)
    axes[idx].set_ylabel('Frequ√™ncia', fontsize=11)
    axes[idx].set_title(f'Distribui√ß√£o - {label}', fontsize=12, fontweight='bold')
    axes[idx].set_xlim([0, 1])
    axes[idx].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

# 5. Curvas ROC (One-vs-Rest)
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc

y_test_bin = label_binarize(y_true, classes=range(len(label_encoder.classes_)))

fig, ax = plt.subplots(figsize=(10, 8))

colors = plt.cm.Set1(np.linspace(0, 1, len(label_encoder.classes_)))
for i, label in enumerate(label_encoder.classes_):
    fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_prob[:, i])
    roc_auc = auc(fpr, tpr)
    ax.plot(fpr, tpr, color=colors[i], lw=2.5, label=f'{label} (AUC = {roc_auc:.3f})')

ax.plot([0, 1], [0, 1], 'k--', lw=2, label='Classificador Aleat√≥rio')
ax.set_xlabel('Taxa de Falso Positivo', fontsize=12, fontweight='bold')
ax.set_ylabel('Taxa de Verdadeiro Positivo', fontsize=12, fontweight='bold')
ax.set_title('Curvas ROC - One-vs-Rest', fontsize=14, fontweight='bold')
ax.legend(loc='lower right', fontsize=10)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# 6. Distribui√ß√£o de predi√ß√µes vs realidade
pred_dist = np.bincount(y_pred, minlength=len(label_encoder.classes_))
true_dist = np.bincount(y_true, minlength=len(label_encoder.classes_))

fig, ax = plt.subplots(figsize=(10, 6))
x = np.arange(len(label_encoder.classes_))
width = 0.35

ax.bar(x - width/2, true_dist, width, label='Real', alpha=0.8)
ax.bar(x + width/2, pred_dist, width, label='Predito', alpha=0.8)

ax.set_xlabel('Classe', fontsize=12, fontweight='bold')
ax.set_ylabel('Frequ√™ncia', fontsize=12, fontweight='bold')
ax.set_title('Distribui√ß√£o Real vs Predita - Conjunto de Teste', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(label_encoder.classes_)
ax.legend(fontsize=11)
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

print("\n‚úì Todas as visualiza√ß√µes geradas com sucesso!")

## 12. Salvar Modelo e Componentes

In [None]:
import pickle
import json

print("\n" + "="*80)
print("SALVANDO MODELO E COMPONENTES")
print("="*80)

# Salvar modelo PyTorch
torch.save(model.state_dict(), 'cnn_model_estado.pth')
print("‚úì Estado do modelo salvo em 'cnn_model_estado.pth'")

# Salvar model completo (para carregamento direto)
torch.save({
    'model_state_dict': model.state_dict(),
    'vocab_size': VOCAB_SIZE,
    'num_classes': num_classes,
    'embedding_dim': 100,
    'hidden_dim': 256
}, 'cnn_model_completo.pth')
print("‚úì Modelo completo salvo em 'cnn_model_completo.pth'")

# Salvar vocabul√°rio
with open('vocabulario.pkl', 'wb') as f:
    pickle.dump(vocabulary, f)
print("‚úì Vocabul√°rio salvo em 'vocabulario.pkl'")

# Salvar label encoder
with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)
print("‚úì Label Encoder salvo em 'label_encoder.pkl'")

# Salvar configura√ß√µes
config = {
    'vocab_size': VOCAB_SIZE,
    'max_sequence_length': MAX_SEQUENCE_LENGTH,
    'num_classes': num_classes,
    'embedding_dim': 100,
    'hidden_dim': 256,
    'batch_size': BATCH_SIZE,
    'learning_rate': LEARNING_RATE,
    'epochs_trained': len(history['train_loss']),
    'classes': label_encoder.classes_.tolist()
}

with open('config.json', 'w') as f:
    json.dump(config, f, indent=2)
print("‚úì Configura√ß√µes salvas em 'config.json'")

# Salvar hist√≥rico de treinamento
history_df = pd.DataFrame(history)
history_df.to_csv('historico_treinamento.csv', index=False)
print("‚úì Hist√≥rico de treinamento salvo em 'historico_treinamento.csv'")

# Salvar m√©tricas finais
metrics_final = {
    'test_accuracy': float(test_accuracy),
    'test_precision': float(test_precision),
    'test_recall': float(test_recall),
    'test_f1': float(test_f1),
    'total_parameters': int(trainable_params),
    'training_time_seconds': elapsed_training
}

with open('metricas_finais.json', 'w') as f:
    json.dump(metrics_final, f, indent=2)
print("‚úì M√©tricas finais salvas em 'metricas_finais.json'")

print("\n‚úì Todos os componentes salvos com sucesso!")

## 13. Resumo Executivo e Recomenda√ß√µes

In [None]:
print("\n" + "="*80)
print("RESUMO EXECUTIVO")
print("="*80)

print(f"""
üìä DATASET
  ‚Ä¢ Tamanho total: {len(df):,} amostras
  ‚Ä¢ Treino: {len(X_train):,} ({len(X_train)/len(df)*100:.1f}%)
  ‚Ä¢ Valida√ß√£o: {len(X_val):,} ({len(X_val)/len(df)*100:.1f}%)
  ‚Ä¢ Teste: {len(X_test):,} ({len(X_test)/len(df)*100:.1f}%)
  ‚Ä¢ Classes: {', '.join(label_encoder.classes_)}

üèóÔ∏è ARQUITETURA
  ‚Ä¢ Tipo: CNN 1D Profunda com Residual Connections
  ‚Ä¢ Camadas Convolucionais: 4 blocos (kernels 3, 4, 5, 7)
  ‚Ä¢ Par√¢metros totais: {total_params:,}
  ‚Ä¢ Par√¢metros trein√°veis: {trainable_params:,}

üìà TREINAMENTO
  ‚Ä¢ √âpocas executadas: {len(history['train_loss'])}
  ‚Ä¢ Tempo total: {elapsed_training/60:.2f} minutos
  ‚Ä¢ Otimizador: AdamW (lr={LEARNING_RATE})
  ‚Ä¢ Early Stopping: Paci√™ncia = {PATIENCE}
  ‚Ä¢ Melhor val_loss: {best_val_loss:.4f}

‚úÖ PERFORMANCE NO TESTE
  ‚Ä¢ Acur√°cia: {test_accuracy:.4f}
  ‚Ä¢ Precis√£o (ponderada): {test_precision:.4f}
  ‚Ä¢ Recall (ponderado): {test_recall:.4f}
  ‚Ä¢ F1-Score (ponderado): {test_f1:.4f}

üìå RECOMENDA√á√ïES PARA MELHORIAS
  1. Usar word embeddings pr√©-treinados (Word2Vec, GloVe, FastText)
  2. Implementar attention mechanisms para capturar contexto
  3. Treinar com mais amostras (se dispon√≠vel)
  4. Usar Transformer-based models (BERT, DistilBERT)
  5. Aplicar data augmentation (paraphrasing, back-translation)
  6. Realizar an√°lise de features mais importantes
  7. Implementar ensemble methods (vota√ß√£o de m√∫ltiplos modelos)
""")

print("="*80)
print("‚úì NOTEBOOK CONCLU√çDO COM SUCESSO!")
print("="*80)