# MMEngine Quickstart
Build a complete and configurable pipeline for both training and validation with a ~~ResNet-50~~ ConvNeXt model on CIFAR-10.
https://mmengine.readthedocs.io/en/latest/get_started/15_minutes.html

## Build a Model
Models in MMEngine should inherit from **`model.BaseModel`**, a simple model interface that can optionally contain a data preprocessor as well.

In [None]:
import torch.nn.functional as F
import torchvision

from mmengine.model import BaseModel

In [None]:
class MMConvNeXtTiny(BaseModel):
    def __init__(self):
        super().__init__()
        self.model = torchvision.models.convnext_tiny()
    
    def forward(self, images, targets, mode):
        """
        mode (str): determines the model's forward path.
            TRAINING: mode='loss', returns a dict containing the key "loss".
            VALIDATION: mode='predict', returns tuple (preds, targets).
            mode='tensor', special case
        """
        x = self.model(images)
        if mode == 'loss':
            return {'loss': F.cross_entropy(x, targets)}
        elif mode == 'predict':
            return x, targets

### Download some data

In [None]:
import os
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Setup the pathing.
HOME_ROOT = os.getenv('HOME')
DATA_ROOT = os.path.join(HOME_ROOT, '.Data')
data_path = DATA_ROOT

# Build the dataloaders.
norm_cfg = dict(mean=[0.491, 0.482, 0.447], std=[0.202, 0.199, 0.201])
train_dataloader = DataLoader(batch_size=32,
                              shuffle=True,
                              dataset=torchvision.datasets.CIFAR10(
                                  #'data/cifar10',
                                  data_path + '/cifar10',
                                  train=True,
                                  download=True,
                                  transform=transforms.Compose([
                                      transforms.RandomCrop(32, padding=4),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize(**norm_cfg)
                                  ])))

val_dataloader = DataLoader(batch_size=32,
                            shuffle=False,
                            dataset=torchvision.datasets.CIFAR10(
                                data_path + '/cifar10',
                                train=False,
                                download=True,
                                transform=transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Normalize(**norm_cfg)
                                ])))

Files already downloaded and verified
Files already downloaded and verified


## Evaluation `Metric`
```
Init signature:
BaseMetric(
    collect_device: str = 'cpu',
    prefix: Optional[str] = None,
    collect_dir: Optional[str] = None,
) -> None
Docstring:     
Base class for a metric.

The metric first processes each batch of data_samples and predictions,
and appends the processed results to the results list. Then it
collects all results together from all ranks if distributed training
is used. Finally, it computes the metrics of the entire dataset.

A subclass of class:`BaseMetric` should assign a meaningful value to the
class attribute `default_prefix`. See the argument `prefix` for details.

Args:
    collect_device (str): Device name used for collecting results from
        different ranks during distributed training. Must be 'cpu' or
        'gpu'. Defaults to 'cpu'.
    prefix (str, optional): The prefix that will be added in the metric
        names to disambiguate homonymous metrics of different evaluators.
        If prefix is not provided in the argument, self.default_prefix
        will be used instead. Default: None
    collect_dir: (str, optional): Synchronize directory for collecting data
        from different ranks. This argument should only be configured when
        ``collect_device`` is 'cpu'. Defaults to None.
        `New in version 0.7.3.`
File:           ~/.pyenv/versions/3.11.2/envs/3112/lib/python3.11/site-packages/mmengine/evaluator/metric.py
Type:           ABCMeta
Subclasses:     DumpResults
```

In [None]:
from mmengine.evaluator import BaseMetric

class Accuracy(BaseMetric):
    def process(self, data_batch, data_samples):
        score, gt = data_samples
        self.results.append({
            'batch_size': len(gt),
            'correct': (score.argmax(dim=1) == gt).sum().cpu(),
        })
    
    def compute_metrics(self, results):
        total_correct = sum(item['correct'] for item in results)
        total_size    = sum(item['batch_size'] for item in results)
        # Return a dict containing the eval results,
        # where the key is the name of the metric.
        accuracy = 100 * total_correct / total_size
        return dict(accuracy=accuracy)

## Build a `Runner` and SEND IT ü§ôüèª

In [None]:
from torch.optim import SGD
from mmengine.runner import Runner

# Pathing for experiments.
EXPERIMENTS_PATH = f"{HOME_ROOT}/Projects/Experiments/mmsandbox"
if not os.path.exists(EXPERIMENTS_PATH):
    os.makedirs(EXPERIMENTS_PATH)

runner = Runner(
    # The model used for training and validation.
    # Needs to meet specific interface requirements.
    model=MMConvNeXtTiny(),
    # Working directory for training logs and model artifacts.
    work_dir=EXPERIMENTS_PATH,
    # `train_dataloader` must adhere to PyTorch DataLoader protocol.
    train_dataloader=train_dataloader,
    # Optimizer wrapper for optimization with additional features
    # like AMP, gradient accumulation, etc.
    optim_wrapper=dict(
        optimizer=dict(type=SGD, lr=0.001, momentum=0.9),
    ),
    # Training configs for specifying training epoches,
    # validation intervals, etc.
    train_cfg=dict(
        by_epoch=True, max_epochs=1, val_interval=1,
    ),
    # Validation dataloader also needs to meet the PyTorch
    # DataLoader protocol.
    val_dataloader=val_dataloader,
    # Validation configs for specifying additional parameters
    # required for validation.
    val_cfg=dict(),
    # Validation evaluator. "We use the default one."
    val_evaluator=dict(type=Accuracy),    
)

06/11 21:06:42 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.11.2 (main, Mar  1 2023, 02:01:39) [GCC 10.3.0]
    CUDA available: True
    numpy_random_seed: 1740429921
    GPU 0: NVIDIA GeForce RTX 3090
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 12.1, V12.1.105
    GCC: gcc (Ubuntu 10.3.0-1ubuntu1~20.04) 10.3.0
    PyTorch: 2.0.1+cu118
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.8
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arc

In [None]:
# TRAIN THIS PUPPY!
runner.train()

06/11 21:06:42 - mmengine - [4m[97mINFO[0m - Checkpoints will be saved to /home/evan/Projects/Experiments/mmsandbox.
06/11 21:06:43 - mmengine - [4m[97mINFO[0m - Epoch(train) [1][  10/1563]  lr: 1.0000e-03  eta: 0:00:50  time: 0.0326  data_time: 0.0111  memory: 1053  loss: 6.4243
06/11 21:06:43 - mmengine - [4m[97mINFO[0m - Epoch(train) [1][  20/1563]  lr: 1.0000e-03  eta: 0:00:49  time: 0.0310  data_time: 0.0113  memory: 1053  loss: 4.5569
06/11 21:06:43 - mmengine - [4m[97mINFO[0m - Epoch(train) [1][  30/1563]  lr: 1.0000e-03  eta: 0:00:48  time: 0.0306  data_time: 0.0110  memory: 1053  loss: 3.0619
06/11 21:06:44 - mmengine - [4m[97mINFO[0m - Epoch(train) [1][  40/1563]  lr: 1.0000e-03  eta: 0:00:47  time: 0.0304  data_time: 0.0111  memory: 1053  loss: 2.4836
06/11 21:06:44 - mmengine - [4m[97mINFO[0m - Epoch(train) [1][  50/1563]  lr: 1.0000e-03  eta: 0:00:46  time: 0.0306  data_time: 0.0109  memory: 1053  loss: 2.2884
06/11 21:06:44 - mmengine - [4m[97mINFO[0m

MMConvNeXtTiny(
  (data_preprocessor): BaseDataPreprocessor()
  (model): ConvNeXt(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
      )
      (1): Sequential(
        (0): CNBlock(
          (block): Sequential(
            (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            (1): Permute()
            (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
            (3): Linear(in_features=96, out_features=384, bias=True)
            (4): GELU(approximate='none')
            (5): Linear(in_features=384, out_features=96, bias=True)
            (6): Permute()
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        )
        (1): CNBlock(
          (block): Sequential(
            (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
            