In [1]:
import os
os.chdir('../')


$$ 
\text{DropConnect}(X, W, M) = \begin{bmatrix}
    \frac{1}{1-p}\begin{bmatrix} x^1{}_1 & x^1{}_2 & \cdots & x^1{}_d \end{bmatrix}
    \left(\begin{bmatrix}
        m^{11}{}_1 & m^{11}{}_2 & \cdots & m^{11}{}_l \\
        m^{12}{}_1 & m^{12}{}_2 & \cdots & m^{12}{}_l \\
        \vdots & \vdots & \ddots & \vdots \\
        m^{1d}{}_1 & m^{1d}{}_2 & \cdots & m^{1d}{}_l \\
    \end{bmatrix} \odot \begin{bmatrix}
        w^1{}_1 & w^1{}_2 & \cdots & w^1{}_l \\
        w^2{}_1 & w^2{}_2 & \cdots & w^2{}_l \\
        \vdots & \vdots & \ddots & \vdots \\
        w^d{}_1 & w^d{}_2 & \cdots & w^d{}_l \\
    \end{bmatrix}
    \right) \\
    \\
    \frac{1}{1-p}\begin{bmatrix}  x^2{}_1 & x^2{}_2 & \cdots & x^2{}_d \end{bmatrix}
    \left(\begin{bmatrix}
        m^{21}{}_1 & m^{21}{}_2 & \cdots & m^{21}{}_l \\
        m^{22}{}_1 & m^{22}{}_2 & \cdots & m^{22}{}_l \\
        \vdots & \vdots & \ddots & \vdots \\
        m^{2d}{}_1 & m^{2d}{}_2 & \cdots & m^{2d}{}_l \\
    \end{bmatrix} \odot \begin{bmatrix}
        w^1{}_1 & w^1{}_2 & \cdots & w^1{}_l \\
        w^2{}_1 & w^2{}_2 & \cdots & w^2{}_l \\
        \vdots & \vdots & \ddots & \vdots \\
        w^d{}_1 & w^d{}_2 & \cdots & w^d{}_l \\
    \end{bmatrix}
    \right) \\
    \\
    \vdots
    \\
    \\
    \frac{1}{1-p}\begin{bmatrix}  x^n{}_1 & x^n{}_2 & \cdots & x^n{}_d \end{bmatrix}
    \left(\begin{bmatrix}
        m^{n1}{}_1 & m^{n1}{}_2 & \cdots & m^{n1}{}_l \\
        m^{n2}{}_1 & m^{n2}{}_2 & \cdots & m^{n2}{}_l \\
        \vdots & \vdots & \ddots & \vdots \\
        m^{nd}{}_1 & m^{nd}{}_2 & \cdots & m^{nd}{}_l \\
    \end{bmatrix} \odot \begin{bmatrix}
        w^1{}_1 & w^1{}_2 & \cdots & w^1{}_l \\
        w^2{}_1 & w^2{}_2 & \cdots & w^2{}_l \\
        \vdots & \vdots & \ddots & \vdots \\
        w^d{}_1 & w^d{}_2 & \cdots & w^d{}_l \\
    \end{bmatrix}
    \right) \\
\end{bmatrix} 
$$

In [2]:
from torch import Tensor
from torcheval.metrics import Mean, MulticlassAccuracy
 
class Metrics:
    def __init__(self, device: str | None = None):
        self.loss = Mean(device=device)
        self.accuracy = MulticlassAccuracy(num_classes=10, device=device) 
        
    def update(self, batch: int, loss: Tensor, predictions: Tensor, targets: Tensor) -> None:
        self.loss.update(loss)
        self.accuracy.update(predictions, targets)
        if batch % 200 == 0:
            print(f"--- Batch {batch}: loss={loss.item()}")
        
    def compute(self) -> dict[str, Tensor]:
        return {
            'loss': self.loss.compute(),
            'accuracy': self.accuracy.compute()
        }
    
    def reset(self) -> None:
        self.loss.reset()
        self.accuracy.reset()

In [3]:
from torch import argmax
from torch.nn import Module, Flatten
from torch.optim import Optimizer
from torchsystem import Aggregate

class Classifier(Aggregate):
    def __init__(self, hash: str, model: Module, criterion: Module, optimizer: Optimizer, metrics: Metrics):
        super().__init__()
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.metrics = metrics
        self.flatten = Flatten()
        self.hash = hash
        self.epoch = 0

    @property
    def id(self):
        return self.hash

    def forward(self, input: Tensor) -> Tensor:
        return self.model(self.flatten(input))
    
    def loss(self, outputs: Tensor, targets: Tensor) -> Tensor:
        return self.criterion(outputs, targets)

    def fit(self, inputs: Tensor, targets: Tensor) -> tuple[Tensor, Tensor]:
        self.optimizer.zero_grad()
        outputs = self(inputs)
        loss = self.loss(outputs, targets)
        loss.backward()
        self.optimizer.step()
        return argmax(outputs, dim=1), loss
    
    def evaluate(self, inputs: Tensor, targets: Tensor) -> tuple[Tensor, Tensor]: 
        outputs = self(inputs)
        return argmax(outputs, dim=1), self.loss(outputs, targets)

In [4]:
from typing import Iterable 
from torchsystem.depends import Depends, Provider
from torchsystem.services import Service, Consumer, event
from mltracker import getallmodels
from mltracker.ports import Models

provider = Provider()
consumer = Consumer(provider=provider) 
service = Service(provider=provider)

def device() -> str:...

def models() -> Models:...

@service.handler
def train(model: Classifier, loader: Iterable[tuple[Tensor, Tensor]], device: str = Depends(device)):
    model.phase = 'train'
    for batch, (inputs, targets) in enumerate(loader, start=1): 
        inputs, targets = inputs.to(device), targets.to(device)  
        predictions, loss = model.fit(inputs, targets)
        model.metrics.update(batch, loss, predictions, targets)
    results = model.metrics.compute()
    consumer.consume(Trained(model, results))

@event
class Trained:
    model: Classifier 
    results: dict[str, Tensor]


@consumer.handler
def handle_epoch(event: Trained):
    event.model.epoch += 1

@consumer.handler
def handle_results(event: Trained, models: Models = Depends(models)):
    model = models.read(event.model.id)
    for name, metric in event.results.items():
        model.metrics.add(name, metric.item(), event.model.epoch, event.model.phase)

@consumer.handler
def print_metrics(event: Trained):
    print(f"-----------------------------------------------------------------")
    print(f"Epoch: {event.model.epoch}, Average loss: {event.results['loss'].item()}, Average accuracy: {event.results['accuracy'].item()}")
    print(f"-----------------------------------------------------------------")

@consumer.handler
def save_epoch(event: Trained, models: Models = Depends(models)):
    model = models.read(event.model.id) or models.create(event.model.id)
    model.epoch = event.model.epoch



In [5]:
from torchsystem import Depends
from torchsystem.compiler import Compiler, compile

compiler = Compiler[Classifier](provider=provider)

@compiler.step
def build_model(nn: Module, criterion: Module, optimizer: Module, metrics: Metrics, device: str = Depends(device)):
    print(f"Moving classifier to device {device}...")
    metrics.accuracy.to(device)
    metrics.loss.to(device)
    return Classifier('1', nn, criterion, optimizer, metrics).to(device)

@compiler.step
def compile_model(classifier: Classifier):
    print("Compiling model...")
    return compile(classifier)

@compiler.step
def bring_to_current_epoch(classifier: Classifier, models: Models = Depends(models)):
    print("Retrieving model from store...")
    model = models.read(classifier.id)
    if not model:
        print(f"model not found, creating one...")
        model = models.create(classifier.id, 'classifier')
    else:
        print(f"model found on epoch {model.epoch}")
    classifier.epoch = model.epoch
    return classifier

In [6]:
import math
from torch import Tensor, bernoulli, full_like, empty
from torch.nn import init, Module, Linear, Parameter, ReLU
from torch.nn.functional import linear

def dropconnect(features: Tensor, weight: Tensor, bias: Tensor | None = None, p: float = 0.5) -> Tensor:
    assert 0 <= p < 1, f"DropConnect probability p must be in [0, 1), got {p}"
    device = features.device
    # Shared mask across the batch
    mask = bernoulli(full_like(weight, 1 - p)).bool()
    masked_weight = weight.masked_fill(~mask, 0) / (1 - p)
    return linear(features, masked_weight, bias)

class Dropconnect(Module):
    __constants__ = ["in_features", "out_features", "p"]
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True, p: float = 0.5, device=None, dtype=None) -> None:
        factory_kwargs = {"device": device, "dtype": dtype}
        super().__init__()
        self.p = p
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(empty((out_features, in_features), **factory_kwargs))
        if bias:
            self.bias = Parameter(empty(out_features, **factory_kwargs))
        else:
            self.register_parameter("bias", None)
        self.reset_parameters()

    def reset_parameters(self) -> None:
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            init.uniform_(self.bias, -bound, bound)

    def forward(self, features: Tensor) -> Tensor:
        if self.training:
            return dropconnect(features, self.weight, self.bias, self.p)
        else:
            return linear(features, self.weight, self.bias)

    def extra_repr(self) -> str:
        return f"in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}, p={self.p}"

class DCP(Module):
    def __init__(self, input_features: int, hidden_features: int, output_features: int, p: float = 0.5):
        super().__init__()
        self.input_layer = Dropconnect(input_features, hidden_features, p=p)
        self.activation = ReLU()
        self.output_layer = Linear(hidden_features, output_features)

    def forward(self, features: Tensor) -> Tensor:
        features = self.input_layer(features)
        features = self.activation(features)
        return self.output_layer(features)

In [7]:
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, Normalize, ToTensor
from datasets.digits import Digits

repository = getallmodels('dropconnect')
provider.override(device, lambda: 'cuda:0')
provider.override(models, lambda: repository) 

nn = DCP(784, 256, 10, p=0.2)
criterion = CrossEntropyLoss()
optimizer = Adam(nn.parameters(), lr=0.001)
metrics = Metrics()
classifier = compiler.compile(nn, criterion, optimizer, metrics)
datasets = {
    'train': Digits(train=True, transform=Compose([ToTensor(), Normalize(0.1307, 0.3081)])),
    'evaluation': Digits(train=True, transform=Compose([ToTensor(), Normalize(0.1307, 0.3081)])),
}
loaders = {
    'train': DataLoader(datasets['train'], batch_size=64, shuffle=True, pin_memory=True, pin_memory_device='cuda', num_workers=4),
    'evaluation': DataLoader(datasets['evaluation'], batch_size=64, shuffle=True, pin_memory=True, pin_memory_device='cuda', num_workers=4)
}

for epoch in range(10):
    train(classifier, loaders['train'])

Moving classifier to device cuda:0...
Compiling model...
Retrieving model from store...
model not found, creating one...
--- Batch 200: loss=0.2356145679950714
--- Batch 400: loss=0.24830473959445953
--- Batch 600: loss=0.09275729954242706
--- Batch 800: loss=0.11138495057821274
-----------------------------------------------------------------
Epoch: 1, Average loss: 0.23751319520699699, Average accuracy: 0.929183304309845
-----------------------------------------------------------------
--- Batch 200: loss=0.10199681669473648
--- Batch 400: loss=0.19998936355113983
--- Batch 600: loss=0.0863189697265625
--- Batch 800: loss=0.12480195611715317
-----------------------------------------------------------------
Epoch: 2, Average loss: 0.17145907957561568, Average accuracy: 0.9485833048820496
-----------------------------------------------------------------
--- Batch 200: loss=0.09888274222612381
--- Batch 400: loss=0.11287199705839157
--- Batch 600: loss=0.024831566959619522
--- Batch 800

In [8]:
from torch.nn import CrossEntropyLoss
from torch.nn import Dropout
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, Normalize, ToTensor
from datasets.digits import Digits

class MLP(Module):
    def __init__(self, input_features: int, hidden_features: int, output_features: int, p: float = 0.5):
        super().__init__()
        self.epoch = 0
        self.input_layer = Linear(input_features, hidden_features)
        self.dropout = Dropout(p)
        self.activation = ReLU()
        self.output_layer = Linear(hidden_features, output_features)

    def forward(self, features: Tensor) -> Tensor:
        features = self.input_layer(features)
        features = self.dropout(features)
        features = self.activation(features)
        return self.output_layer(features)

repository = getallmodels('dropconnect')
provider.override(device, lambda: 'cuda:0')
provider.override(models, lambda: repository) 

nn = MLP(784, 256, 10, 0.2)
criterion = CrossEntropyLoss()
optimizer = Adam(nn.parameters(), lr=0.001)
metrics = Metrics()
classifier = compiler.compile(nn, criterion, optimizer, metrics)
datasets = {
    'train': Digits(train=True, transform=Compose([ToTensor(), Normalize(0.1307, 0.3081)])),
    'evaluation': Digits(train=True, transform=Compose([ToTensor(), Normalize(0.1307, 0.3081)])),
}
loaders = {
    'train': DataLoader(datasets['train'], batch_size=64, shuffle=True, pin_memory=True, pin_memory_device='cuda', num_workers=4),
    'evaluation': DataLoader(datasets['evaluation'], batch_size=64, shuffle=True, pin_memory=True, pin_memory_device='cuda', num_workers=4)
}

for epoch in range(10):
    train(classifier, loaders['train'])

Moving classifier to device cuda:0...
Compiling model...
Retrieving model from store...
model not found, creating one...
--- Batch 200: loss=0.2838340997695923
--- Batch 400: loss=0.35911861062049866
--- Batch 600: loss=0.20519579946994781
--- Batch 800: loss=0.1655597984790802
-----------------------------------------------------------------
Epoch: 1, Average loss: 0.2550781427253125, Average accuracy: 0.9235333204269409
-----------------------------------------------------------------
--- Batch 200: loss=0.18443703651428223
--- Batch 400: loss=0.10197896510362625
--- Batch 600: loss=0.11046610027551651
--- Batch 800: loss=0.13306830823421478
-----------------------------------------------------------------
Epoch: 2, Average loss: 0.18731357665089116, Average accuracy: 0.9436833262443542
-----------------------------------------------------------------
--- Batch 200: loss=0.03697671741247177
--- Batch 400: loss=0.009128664620220661
--- Batch 600: loss=0.12625662982463837
--- Batch 800