# Regularization
## Weight decay

In [None]:
# Step 2: Define a simple neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(10, 5)
        self.fc2 = nn.Linear(5, 1)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

model = Net()

# Step 3: Set up loss and optimizer WITH weight decay
criterion = nn.BCELoss()  # Binary cross-entropy
optimizer = optim.Adam(model.parameters(), lr=0.01, 
                       weight_decay=1e-4)  # Weight decay here!

# Step 4: Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for batch_X, batch_y in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_X).squeeze()
        loss = criterion(outputs, batch_y.float())
        
        # Backpropagation
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

## Dataset augmentation

In [None]:
# In images

augmentation = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),  # 50% chance to flip
    transforms.RandomRotation(degrees=15),   # Rotate ±15 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(size=224, scale=(0.8, 1.0)),  # Random crop + resize
    transforms.ToTensor(),  # Convert to tensor (normalizes to [0,1])
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet stats
])

dataset = datasets.CIFAR10(root='./data', train=True, 
                           download=True, transform=augmentation)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

for images, labels in dataloader:

    # forward pass
    output = model(images)

In [None]:
# In text processing

import nlpaug.augmenter.word as naw

text = "The quick brown fox jumps over the lazy dog"

# Synonym replacement
aug = naw.SynonymAug(aug_src='wordnet')
augmented_text = aug.augment(text)
print(augmented_text)  # e.g., "The fast brown fox leaps over the lazy dog"

# Random word swap
aug = naw.RandomWordAug(action="swap")
print(aug.augment(text))  # e.g., "The brown quick fox jumps lazy the over dog"

In [None]:
# In time series

import numpy as np
from tsaug import TimeWarp, AddNoise, Drift

# Sample time-series (e.g., sensor data)
X = np.random.randn(100, 3)  # 100 timesteps, 3 features

# Apply augmentations
augmented = (
    TimeWarp(n_speed_change=3)  # Random speed changes
    + AddNoise(scale=0.1)       # Add Gaussian noise
    + Drift(max_drift=0.3)      # Simulate sensor drift
).augment(X)

In [None]:
# In tabular data
from imblearn.over_sampling import SMOTE

# Sample tabular data
X = [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]  # Features
y = [0, 1, 0]                              # Imbalanced labels

# Generate synthetic samples
smote = SMOTE()
X_aug, y_aug = smote.fit_resample(X, y)
print(X_aug)  # e.g., [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.35, 0.45]] (new sample)

In [None]:
# In audio data
import librosa

# Load audio
y, sr = librosa.load("audio.wav")

# Pitch shift
y_shifted = librosa.effects.pitch_shift(y, sr, n_steps=2)  # Shift up 2 semitones

# Time stretch
y_stretched = librosa.effects.time_stretch(y, rate=1.2)  # 20% slower

## Early stopping

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(10, 5),
            nn.ReLU(),
            nn.Linear(5, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        return self.layers(x).squeeze()

model = Classifier()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

class EarlyStopper:
    def __init__(self, patience=3, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def should_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss - self.min_delta:
            self.min_validation_loss = validation_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

def validate(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X, y in val_loader:
            outputs = model(X)
            loss = criterion(outputs, y)
            val_loss += loss.item()
    return val_loss / len(val_loader)

early_stopper = EarlyStopper(patience=5, min_delta=0.01)
best_model_weights = None

for epoch in range(100):  # Max epochs
    # Training
    model.train()
    train_loss = 0.0
    for X, y in train_loader:
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validation
    val_loss = validate(model, val_loader, criterion)
    print(f"Epoch {epoch+1}: Train Loss = {train_loss/len(train_loader):.4f}, Val Loss = {val_loss:.4f}")
    
    # Early stopping check
    if early_stopper.should_stop(val_loss):
        print(f"Early stopping triggered at epoch {epoch+1}!")
        break

    # Optional: Save best model weights
    if val_loss == early_stopper.min_validation_loss:
        best_model_weights = model.state_dict().copy()

# Restore best model (optional)
if best_model_weights:
    model.load_state_dict(best_model_weights)

## Dropout

In [None]:
class NetNoDropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(2, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 2)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)
    

class NetWithDropout(nn.Module):
    def __init__(self, dropout_rate=0.5):
        super().__init__()
        self.fc1 = nn.Linear(2, 128)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(128, 128)
        self.dropout2 = nn.Dropout(dropout_rate)
        self.fc3 = nn.Linear(128, 2)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)  # Dropout applied after activation
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        return self.fc3(x)
    

# model.train()
# model.eval()

## Batch normalization

Batch Normalization is like a traffic cop for your neural network:

    - Problem: Without it, some neurons get "loud" (huge numbers) while others stay "quiet" (tiny decimals), making training chaotic.

    - Solution:

        - Normalize: Every batch of data is adjusted to have an average of 0 and a standard deviation of 1 (like grading on a curve).

        - Scale & Shift: Then, the network learns how much to "re-adjust" the values (like turning up/down the volume).

In [None]:
class NetNoBN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(20, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 2)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)
    
class NetWithBN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(20, 64)
        self.bn1 = nn.BatchNorm1d(64)  # BatchNorm after first layer
        self.fc2 = nn.Linear(64, 64)
        self.bn2 = nn.BatchNorm1d(64)  # BatchNorm after second layer
        self.fc3 = nn.Linear(64, 2)
        
    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = torch.relu(self.bn2(self.fc2(x)))
        return self.fc3(x)
    
# model.train()
# model.eval()

# Adversarial attack

In [None]:
import torch
import torch.nn as nn
from torch.optim import SGD

def fgsm_attack(model, x, y, epsilon, loss_fn):
    x.requires_grad = True
    outputs = model(x)
    loss = loss_fn(outputs, y)
    model.zero_grad()
    loss.backward()
    perturbed_x = x + epsilon * x.grad.sign()
    return torch.clamp(perturbed_x, 0, 1)  # For image data

def adversarial_train(model, train_loader, epsilon=0.01, epochs=10):
    optimizer = SGD(model.parameters(), lr=0.01)
    loss_fn = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        for x, y in train_loader:
            # Generate adversarial examples
            x_adv = fgsm_attack(model, x, y, epsilon, loss_fn)
            # Combined loss (clean + adversarial)
            outputs_clean = model(x)
            outputs_adv = model(x_adv)
            loss = 0.5 * (loss_fn(outputs_clean, y) + 0.5 * loss_fn(outputs_adv, y))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()