#### These are the basic methods that can potentially reduce the energy and resource consumption of ML models without significantly compromising performance. Here are the implementation examples of these methods:


In [6]:
# FOR DEMO PURPOSES, REPLACE WITH YOUR OWN MODEL CODE
n = 9
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.optim as optim

# Load and preprocess data
X = np.load(f'../Datasets/kryptonite-{n}-X.npy')
y = np.load(f'../Datasets/kryptonite-{n}-y.npy')

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Custom Dataset class
class KryptoniteDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.LongTensor(y)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
# Model
class TransformerClassifier(nn.Module):
    def __init__(self, input_dim, num_heads=8, num_layers=6, dim_feedforward=256, dropout=0.1):
        super().__init__()
        
        # Initial linear projection to match transformer input requirements
        self.input_proj = nn.Linear(input_dim, dim_feedforward)
        
        # Position encoding (learned)
        self.pos_encoder = nn.Parameter(torch.randn(1, 1, dim_feedforward))
        
        # Transformer encoder layers
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=dim_feedforward,
            nhead=num_heads,
            dim_feedforward=dim_feedforward*2,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        # Output head
        self.classifier = nn.Sequential(
            nn.Linear(dim_feedforward, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 2)  # Binary classification
        )

    def forward(self, x):
        # Add sequence dimension
        x = x.unsqueeze(1)
        x = self.input_proj(x)
        x = x + self.pos_encoder
        x = self.transformer_encoder(x)
        x = x.squeeze(1)
        return self.classifier(x)

# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize model, criterion, optimizer, etc.
model = TransformerClassifier(input_dim=X_train.shape[1]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Create datasets and dataloaders
train_dataset = KryptoniteDataset(X_train, y_train)
val_dataset = KryptoniteDataset(X_val, y_val)
test_dataset = KryptoniteDataset(X_test, y_test)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)


### 1. Early Stopping


Early stopping terminates training when the model's performance on a validation set stops improving. This avoids unnecessary epochs, reducing time and energy usage.


In [7]:
import torch

# Early stopping parameters
patience = 5
best_val_loss = float("inf")
early_stop_counter = 0

# Training loop
epochs = 50
for epoch in range(epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
    
    # ...
    
    # Validation loop
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            val_loss += criterion(outputs, y_batch).item()

    # Early stopping check
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break


Early stopping at epoch 12


### 2. Dynamic Learning Rate (LR) Scheduling


By reducing the learning rate as training progresses, you can often reach convergence with fewer epochs. This technique helps avoid excess computation in later epochs and reduces energy consumption.

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5, verbose=True)

# During training
for epoch in range(epochs):
    # Train and validate as before
    # ...

    # Adjust learning rate based on validation loss
    scheduler.step(val_loss)


### 3. Mixed Precision Training


Mixed precision allows the model to use both 32-bit and 16-bit floating-point operations. This can significantly reduce memory usage and increase computational speed without much performance loss.


In [None]:
from torch.amp import GradScaler, autocast

scaler = GradScaler()

for epoch in range(epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        
        # Use autocast for mixed precision
        with autocast():
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
        
        # Scale the loss and backward pass
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()


### 4. Pruning (Optional)


Model pruning reduces the size of a network by removing less important weights, thus reducing memory usage and computation. Techniques like structured pruning remove entire neurons or channels rather than individual weights, which is more efficient in practice.

In [None]:
# For ResNet
import torch.nn.utils.prune as prune

# Prune 20% of the neurons in conv1 layer by L1 norm
prune.ln_structured(model.resnet.conv1, name="weight", amount=0.2, n=1, dim=0)

# Remove pruning reparameterization to finalize the model
prune.remove(model.resnet.conv1, 'weight')


In [None]:
# For basic NeuralNet

# Apply pruning to the first fully connected layer (fc1)
prune.l1_unstructured(model.fc1, name="weight", amount=0.3)  # Prune 30% of weights

# Optionally remove the pruning reparameterization
prune.remove(model.fc1, 'weight')

In [None]:
# For basic Decision tree

# Pruning logic for decision trees
def prune_decision_tree(tree_model, importance_threshold):
    # Example: Prune nodes with weights below a threshold
    for name, param in tree_model.named_parameters():
        if 'weight' in name and torch.abs(param).mean() < importance_threshold:
            print(f"Pruning node: {name}")
            param.data.fill_(0)  # Remove the contribution of this node

# Apply pruning
prune_decision_tree(tree_model, importance_threshold=0.1)
