This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.

**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**

This notebook was generated for TensorFlow 2.6.

# Fundamentals of machine learning

## Generalization: The goal of machine learning

### Underfitting and overfitting

#### Noisy training data

#### Ambiguous features

#### Rare features and spurious correlations

**Adding white-noise channels or all-zeros channels to MNIST**

In [0]:
import torch
from torchvision import datasets, transforms
import numpy as np

# Transformação para vetorizar (28x28 -> 784)
transform = transforms.Compose([
    transforms.ToTensor(),                        # [0, 255] -> [0.0, 1.0]
    transforms.Lambda(lambda x: x.view(-1))       # Flatten de 28x28 para 784
])

# Carregando MNIST
mnist_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(mnist_dataset, batch_size=len(mnist_dataset), shuffle=False)

# Pegando todos os dados de uma vez (já que o notebook faz tudo de forma direta)
train_images, train_labels = next(iter(train_loader))  # shape: [60000, 784], [60000]
train_images = train_images.numpy()                    # Convertendo para numpy (como no original)

# Adicionando canal de ruído branco
train_images_with_noise_channels = np.concatenate(
    [train_images, np.random.random((len(train_images), 784))], axis=1)

# Adicionando canal de zeros
train_images_with_zeros_channels = np.concatenate(
    [train_images, np.zeros((len(train_images), 784))], axis=1)

# Convertendo labels para numpy também
train_labels = train_labels.numpy()


**Training the same model on MNIST data with noise channels or all-zero channels**

In [0]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(1568, 512)  # 784 + 784 = 1568
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)  # sem softmax aqui — CrossEntropyLoss cuida disso

from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

def train_model(X, y, epochs=10, batch_size=128):
    # Split treino/val
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Convertendo para tensores do PyTorch
    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.long)
    X_val = torch.tensor(X_val, dtype=torch.float32)
    y_val = torch.tensor(y_val, dtype=torch.long)

    # DataLoaders
    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)

    model = SimpleModel()
    optimizer = torch.optim.RMSprop(model.parameters())
    loss_fn = nn.CrossEntropyLoss()

    history = {'val_accuracy': []}

    for epoch in range(epochs):
        model.train()
        for xb, yb in train_loader:
            preds = model(xb)
            loss = loss_fn(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Validação
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for xb, yb in val_loader:
                preds = model(xb)
                predicted = torch.argmax(preds, dim=1)
                correct += (predicted == yb).sum().item()
                total += yb.size(0)
        acc = correct / total
        history['val_accuracy'].append(acc)
        print(f"Epoch {epoch+1}: val_acc = {acc:.4f}")

    return model, history

model_noise, history_noise = train_model(train_images_with_noise_channels, train_labels)

model_zeros, history_zeros = train_model(train_images_with_zeros_channels, train_labels)


**Plotting a validation accuracy comparison**

In [0]:
import matplotlib.pyplot as plt

val_acc_noise = history_noise["val_accuracy"]
val_acc_zeros = history_zeros["val_accuracy"]
epochs = range(1, len(val_acc_noise) + 1)

plt.plot(epochs, val_acc_noise, "b-", label="Validation accuracy with noise channels")
plt.plot(epochs, val_acc_zeros, "b--", label="Validation accuracy with zeros channels")
plt.title("Effect of noise channels on validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()


### The nature of generalization in deep learning

**Fitting a MNIST model with randomly shuffled labels**

In [0]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np

# Transformação para normalizar e achatar
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))
])

# Carregar MNIST
mnist_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(mnist_dataset, batch_size=len(mnist_dataset))
train_images, train_labels = next(iter(train_loader))

# Para numpy
train_images = train_images.numpy().astype("float32")
train_labels = train_labels.numpy()

# Embaralhar os labels
random_train_labels = train_labels.copy()
np.random.shuffle(random_train_labels)


# Reutilizando a função anterior
class SimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

# Treinando o modelo com rótulos aleatórios
model_random, history_random = train_model(train_images, random_train_labels, epochs=100, batch_size=128)


#### The manifold hypothesis

#### Interpolation as a source of generalization

#### Why deep learning works

#### Training data is paramount

## Evaluating machine-learning models

### Training, validation, and test sets

#### Simple hold-out validation

#### K-fold validation

#### Iterated K-fold validation with shuffling

### Beating a common-sense baseline

### Things to keep in mind about model evaluation

## Improving model fit

### Tuning key gradient descent parameters

**Training a MNIST model with an incorrectly high learning rate**

In [0]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))
])

mnist_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(mnist_dataset, batch_size=len(mnist_dataset))
train_images, train_labels = next(iter(train_loader))

train_images = train_images.numpy().astype("float32")
train_labels = train_labels.numpy()

def train_model(X, y, epochs=10, batch_size=128, learning_rate=0.001):
    from sklearn.model_selection import train_test_split
    from torch.utils.data import TensorDataset, DataLoader

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.long)
    X_val = torch.tensor(X_val, dtype=torch.float32)
    y_val = torch.tensor(y_val, dtype=torch.long)

    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)

    model = SimpleModel()
    optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()

    history = {'val_accuracy': []}

    for epoch in range(epochs):
        model.train()
        for xb, yb in train_loader:
            preds = model(xb)
            loss = loss_fn(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for xb, yb in val_loader:
                preds = model(xb)
                predicted = torch.argmax(preds, dim=1)
                correct += (predicted == yb).sum().item()
                total += yb.size(0)
        acc = correct / total
        history['val_accuracy'].append(acc)
        print(f"Epoch {epoch+1}: val_acc = {acc:.4f}")

    return model, history

model_high_lr, history_high_lr = train_model(train_images, train_labels, epochs=10, batch_size=128, learning_rate=1.0)




**The same model with a more appropriate learning rate**

In [0]:
# Modelo
class SimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

# Instanciar o modelo
model = SimpleModel()

# Otimizador com taxa de aprendizado apropriada (1e-2)
optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-2)

# Função de perda
loss_fn = nn.CrossEntropyLoss()

# Dividir treino e validação
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=128, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=128)

# Treinamento
for epoch in range(10):
    model.train()
    for xb, yb in train_loader:
        preds = model(xb)
        loss = loss_fn(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Validação (opcional)
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in val_loader:
            preds = model(xb)
            predicted = torch.argmax(preds, dim=1)
            correct += (predicted == yb).sum().item()
            total += yb.size(0)
    val_acc = correct / total
    print(f"Epoch {epoch+1}: val_acc = {val_acc:.4f}")


### Leveraging better architecture priors

### Increasing model capacity

**A simple logistic regression on MNIST**

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np

# Modelo: Regressão logística
class LogisticRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(784, 10)

    def forward(self, x):
        return self.fc(x)  # sem softmax (CrossEntropyLoss já aplica)

# Dados já devem estar em numpy (train_images, train_labels)

# Separar treino e validação
X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=128, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=128)

# Instanciar modelo, otimizador, função de perda
model = LogisticRegressionModel()
optimizer = torch.optim.RMSprop(model.parameters())
loss_fn = nn.CrossEntropyLoss()

# Histórico para plot posterior
history_small_model = {"val_loss": []}

# Treinamento por 20 épocas
for epoch in range(20):
    model.train()
    for xb, yb in train_loader:
        preds = model(xb)
        loss = loss_fn(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validação
    model.eval()
    val_loss_total = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            preds = model(xb)
            loss = loss_fn(preds, yb)
            val_loss_total += loss.item() * xb.size(0)

    val_loss_avg = val_loss_total / len(val_loader.dataset)
    history_small_model["val_loss"].append(val_loss_avg)
    print(f"Epoch {epoch+1}: val_loss = {val_loss_avg:.4f}")


In [0]:
import matplotlib.pyplot as plt

val_loss = history_small_model["val_loss"]
epochs = range(1, len(val_loss) + 1)

plt.plot(epochs, val_loss, "b--", label="Validation loss")
plt.title("Effect of insufficient model capacity on validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()


In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# 1. Definir o modelo mais "largo"
class LargeModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 96)
        self.fc2 = nn.Linear(96, 96)
        self.fc3 = nn.Linear(96, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)  # CrossEntropyLoss cuida da softmax
       
# 2. Separar treino e validação
X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=128, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=128)

# 3. Instanciar modelo e otimizador
model = LargeModel()
optimizer = torch.optim.RMSprop(model.parameters())
loss_fn = nn.CrossEntropyLoss()

# 4. Treinamento
history_large_model = {"val_loss": []}

for epoch in range(20):
    model.train()
    for xb, yb in train_loader:
        preds = model(xb)
        loss = loss_fn(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validação
    model.eval()
    val_loss_total = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            preds = model(xb)
            loss = loss_fn(preds, yb)
            val_loss_total += loss.item() * xb.size(0)

    val_loss_avg = val_loss_total / len(val_loader.dataset)
    history_large_model["val_loss"].append(val_loss_avg)
    print(f"Epoch {epoch+1}: val_loss = {val_loss_avg:.4f}")


## Improving generalization

### Dataset curation

### Feature engineering

### Using early stopping

### Regularizing your model

#### Reducing the network's size

**Original model**

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import imdb  # só para carregar o dataset original

# 1. Carregar dataset IMDB
(train_data, train_labels), _ = imdb.load_data(num_words=10000)

# 2. Vetorizar os dados (one-hot manual)
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension), dtype="float32")
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.0
    return results

train_data = vectorize_sequences(train_data, dimension=10000)
train_labels = np.array(train_labels).astype("float32")

# 3. Separar treino e validação
X_train, X_val, y_train, y_val = train_test_split(train_data, train_labels, test_size=0.4, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=512, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=512)

# 4. Modelo
class SentimentModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10000, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))  # Saída entre 0 e 1
        return x

model = SentimentModel()
optimizer = torch.optim.RMSprop(model.parameters())
loss_fn = nn.BCELoss()

# 5. Treinamento
history_original = {"val_accuracy": []}

for epoch in range(20):
    model.train()
    for xb, yb in train_loader:
        preds = model(xb).squeeze()
        loss = loss_fn(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validação
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in val_loader:
            preds = model(xb).squeeze()
            predicted = (preds >= 0.5).float()
            correct += (predicted == yb).sum().item()
            total += yb.size(0)

    acc = correct / total
    history_original["val_accuracy"].append(acc)
    print(f"Epoch {epoch+1}: val_acc = {acc:.4f}")


**Version of the model with lower capacity**

In [0]:
# 1. Separar novamente os dados (caso precise)
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

X_train, X_val, y_train, y_val = train_test_split(train_data, train_labels, test_size=0.4, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=512, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=512)

# 2. Modelo com capacidade reduzida
class SmallerSentimentModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10000, 4)
        self.fc2 = nn.Linear(4, 4)
        self.fc3 = nn.Linear(4, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return torch.sigmoid(self.fc3(x))

model = SmallerSentimentModel()
optimizer = torch.optim.RMSprop(model.parameters())
loss_fn = nn.BCELoss()

# 3. Treinamento
history_smaller_model = {"val_accuracy": []}

for epoch in range(20):
    model.train()
    for xb, yb in train_loader:
        preds = model(xb).squeeze()
        loss = loss_fn(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validação
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in val_loader:
            preds = model(xb).squeeze()
            predicted = (preds >= 0.5).float()
            correct += (predicted == yb).sum().item()
            total += yb.size(0)

    acc = correct / total
    history_smaller_model["val_accuracy"].append(acc)
    print(f"Epoch {epoch+1}: val_acc = {acc:.4f}")


**Version of the model with higher capacity**

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# 1. Dividir os dados em treino e validação
X_train, X_val, y_train, y_val = train_test_split(train_data, train_labels, test_size=0.4, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=512, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=512)

# 2. Modelo com maior capacidade
class LargerSentimentModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10000, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return torch.sigmoid(self.fc3(x))

model = LargerSentimentModel()
optimizer = torch.optim.RMSprop(model.parameters())
loss_fn = nn.BCELoss()

# 3. Treinamento
history_larger_model = {"val_accuracy": []}

for epoch in range(20):
    model.train()
    for xb, yb in train_loader:
        preds = model(xb).squeeze()
        loss = loss_fn(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validação
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in val_loader:
            preds = model(xb).squeeze()
            predicted = (preds >= 0.5).float()
            correct += (predicted == yb).sum().item()
            total += yb.size(0)

    acc = correct / total
    history_larger_model["val_accuracy"].append(acc)
    print(f"Epoch {epoch+1}: val_acc = {acc:.4f}")


#### Adding weight regularization

**Adding L2 weight regularization to the model**

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# 1. Divisão treino/validação
X_train, X_val, y_train, y_val = train_test_split(train_data, train_labels, test_size=0.4, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=512, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=512)

# 2. Modelo com arquitetura original
class L2RegularizedModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10000, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return torch.sigmoid(self.fc3(x))

model = L2RegularizedModel()

# 3. Otimizador com regularização L2 (weight_decay)
optimizer = torch.optim.RMSprop(model.parameters(), weight_decay=0.002)
loss_fn = nn.BCELoss()

# 4. Treinamento
history_l2_reg = {"val_accuracy": []}

for epoch in range(20):
    model.train()
    for xb, yb in train_loader:
        preds = model(xb).squeeze()
        loss = loss_fn(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validação
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in val_loader:
            preds = model(xb).squeeze()
            predicted = (preds >= 0.5).float()
            correct += (predicted == yb).sum().item()
            total += yb.size(0)

    acc = correct / total
    history_l2_reg["val_accuracy"].append(acc)
    print(f"Epoch {epoch+1}: val_acc = {acc:.4f}")


**Different weight regularizers available in Keras**

In [0]:
l1_lambda = 0.001
l1_loss = 0
for param in model.parameters():
    l1_loss += torch.sum(torch.abs(param))

loss = loss_fn(preds, yb) + l1_lambda * l1_loss

# Otimizador com L2 regularization
optimizer = torch.optim.RMSprop(model.parameters(), weight_decay=0.001)

# L1 regularization manual dentro do treino
l1_lambda = 0.001
l1_loss = 0
for param in model.parameters():
    l1_loss += torch.sum(torch.abs(param))

loss = loss_fn(preds, yb) + l1_lambda * l1_loss


#### Adding dropout

**Adding dropout to the IMDB model**

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# 1. Separar treino e validação
X_train, X_val, y_train, y_val = train_test_split(train_data, train_labels, test_size=0.4, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=512, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=512)

# 2. Modelo com Dropout
class DropoutSentimentModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10000, 16)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(16, 16)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(16, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        return torch.sigmoid(self.fc3(x))

model = DropoutSentimentModel()
optimizer = torch.optim.RMSprop(model.parameters())
loss_fn = nn.BCELoss()

# 3. Treinamento
history_dropout = {"val_accuracy": []}

for epoch in range(20):
    model.train()
    for xb, yb in train_loader:
        preds = model(xb).squeeze()
        loss = loss_fn(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validação
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for xb, yb in val_loader:
            preds = model(xb).squeeze()
            predicted = (preds >= 0.5).float()
            correct += (predicted == yb).sum().item()
            total += yb.size(0)

    acc = correct / total
    history_dropout["val_accuracy"].append(acc)
    print(f"Epoch {epoch+1}: val_acc = {acc:.4f}")


## Summary