# Network in Network

In [1]:
import logging

logging.basicConfig(level=logging.INFO)

logger = logging.getLogger(__name__)

In [2]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_SERVER_URI = "http://localhost:20000"

mlflow_client = MlflowClient(MLFLOW_TRACKING_SERVER_URI)

In [3]:
from mlflow.entities import Experiment

import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_SERVER_URI)

experiment: Experiment = mlflow.set_experiment(experiment_name="NiN 01")

In [4]:
run = mlflow_client.create_run(
    experiment_id=experiment.experiment_id,
)

In [5]:
from recognizer.utils.constants import ROOT_DIR, TARGET_TO_ENCODING
from recognizer.utils.utils import get_metadata_from_filename

DATASET_DIR_POSTA = ROOT_DIR / "data" / "all-10percent"

In [14]:
BATCH_SIZE = 24
NUM_CLASSES = 64
EPOCHS = 1
NUM_FRAMES = 8

LR = 0.005

mlflow_client.log_param(run_id=run.info.run_id, key="BATCH_SIZE", value=BATCH_SIZE)
mlflow_client.log_param(run_id=run.info.run_id, key="NUM_CLASSES", value=NUM_CLASSES)
mlflow_client.log_param(run_id=run.info.run_id, key="EPOCHS", value=EPOCHS)
mlflow_client.log_param(run_id=run.info.run_id, key="NUM_FRAMES", value=NUM_FRAMES)

MlflowException: INVALID_PARAMETER_VALUE: Changing param values is not allowed. Param with key='EPOCHS' was already logged with value='10' for run ID='8e3c448a4fb94fb0af916b1fbe782424'. Attempted logging new value '1'.

The cause of this error is typically due to repeated calls
to an individual run_id event logging.

Incorrect Example:
---------------------------------------
with mlflow.start_run():
    mlflow.log_param("depth", 3)
    mlflow.log_param("depth", 5)
---------------------------------------

Which will throw an MlflowException for overwriting a
logged parameter.

Correct Example:
---------------------------------------
with mlflow.start_run():
    with mlflow.start_run(nested=True):
        mlflow.log_param("depth", 3)
    with mlflow.start_run(nested=True):
        mlflow.log_param("depth", 5)
---------------------------------------

Which will create a new nested run for each individual
model and prevent parameter key collisions within the
tracking store.

## Load Data

In [None]:
import os

import pandas as pd

targets = []
subjects = []
repetitions = []
files = []

for file in os.listdir(DATASET_DIR_POSTA):

    target, subject, repetition = get_metadata_from_filename(file)

    targets.append(target)
    subjects.append(subject)
    repetitions.append(repetition)
    files.append(str((DATASET_DIR_POSTA / file).resolve()))


metadata = pd.DataFrame(
    data={
        "target": targets,
        "subject": subjects,
        "repetition": repetitions,
        "file": files,
    }
)

metadata["target_encoding"] = metadata["target"].map(TARGET_TO_ENCODING)

metadata.head()

Unnamed: 0,target,subject,repetition,file,target_encoding
0,8,10,3,/Users/facundopalavecino/Documents/DEV/ecd-tra...,7
1,23,1,3,/Users/facundopalavecino/Documents/DEV/ecd-tra...,22
2,39,3,5,/Users/facundopalavecino/Documents/DEV/ecd-tra...,38
3,51,5,2,/Users/facundopalavecino/Documents/DEV/ecd-tra...,50
4,10,7,1,/Users/facundopalavecino/Documents/DEV/ecd-tra...,9


### Train/Test split

In [None]:
import numpy as np

size = 1
replace = False
fn = lambda obj: obj.loc[np.random.choice(obj.index, size, replace),:]

testing_set = metadata.groupby(["target", "subject"], as_index=False).apply(fn)

testing_set.index = testing_set.index.droplevel(0)

training_set = metadata.loc[~metadata.index.isin(testing_set.index), :]

### Pre-processing functions

In [None]:
def transform(x):
    """Permutes the element to match the format expected by PyTorch: (C<channels>, T<frames>, H<height>, W<width>)"""
    # Transpose video from (T<frames>, H<height>, W<width>, C<channels>) to (C<channels>, T<frames>, H<height>, W<width>)
    return x.permute(3, 0, 1, 2).float()


### Datasets

In [None]:
from recognizer.dataset import SampledVideoDataset

training_dataset = SampledVideoDataset(
    video_filenames=training_set["file"].values,
    labels=training_set["target_encoding"].values,
    num_frames=NUM_FRAMES,
    transform=transform,
)

testing_dataset = SampledVideoDataset(
    video_filenames=testing_set["file"].values,
    labels=testing_set["target_encoding"].values,
    num_frames=NUM_FRAMES,
    transform=transform,
)

print(f"Training/testing set: ({len(training_dataset)}, {len(testing_dataset)})")

Training/testing set: (2560, 640)


### Data Loaders

In [15]:
import torch 

from torch import nn

from recognizer.models.nin import NiNVideoClassifier

# Las imágenes del video son de 384x216 
# Hay 8 frames por video

# Es decir que cada elemento inut será de 3x8x216x382 (C<channels> * T<frames> * H<height> * W<width>)
# Eso es un total de 

model = NiNVideoClassifier(
    num_classes=NUM_CLASSES,
    num_frames=NUM_FRAMES,
    batch_size=BATCH_SIZE,
)

train_loader = torch.utils.data.DataLoader(training_dataset, batch_size=BATCH_SIZE, shuffle = False)

test_loader = torch.utils.data.DataLoader(testing_dataset, batch_size=BATCH_SIZE, shuffle = False)

loss_function = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=LR)

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
device = "cpu" # RuntimeError: Conv3D is not supported on MPS

model.to(device)


NiNVideoClassifier(
  (nin_block1): Sequential(
    (0): Conv3d(3, 192, kernel_size=(3, 5, 5), stride=(1, 1, 1), padding=(2, 2, 2))
    (1): ReLU(inplace=True)
    (2): Conv3d(192, 192, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (3): ReLU(inplace=True)
    (4): Conv3d(192, 192, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (5): ReLU(inplace=True)
  )
  (nin_block2): Sequential(
    (0): Conv3d(192, 160, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (1): ReLU(inplace=True)
    (2): Conv3d(160, 160, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (3): ReLU(inplace=True)
    (4): Conv3d(160, 160, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (5): ReLU(inplace=True)
  )
  (nin_block3): Sequential(
    (0): Conv3d(160, 96, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (1): ReLU(inplace=True)
    (2): Conv3d(96, 96, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (3): ReLU(inplace=True)
    (4): Conv3d(96, 96, kernel_size=(1, 1, 1), stride=(1, 1, 1))
    (5): ReLU(inplace=True)
  )
  (maxpool): MaxP

## Training

In [16]:
import subprocess

CMD = '''
on run argv
  display notification (item 2 of argv) with title (item 1 of argv) sound name "Glass"
end run
'''

def notify(title, text):
    subprocess.call(['osascript', '-e', CMD, title, text])

In [17]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix
)

metrics = {
    "training_loss": [],
    "testing_loss": [],
    "accuracy": [],
    "precision": [],
    "recall": [],
    "f1": [],
}

cm = None

In [18]:
logger.info(f"""
Beginning model training with parameters:
- Epochs: {EPOCHS}
- Batch Size: {BATCH_SIZE}
""")
for epoch in range(EPOCHS):

    logger.info(f" --- Epoch {epoch + 1} - Training ---")

    model.train()

    running_loss = 0.0

    for _, data in enumerate(train_loader):

        batch, labels = data[0].float(), data[1]
        batch = batch.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        logits = model(batch)

        loss = loss_function(logits, labels)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()

    average_training_loss = running_loss / len(train_loader)
    training_losses.append(average_training_loss)

    logger.info(f"AVG Training Loss: {average_training_loss:.2f}")

    # Evaluation
    logger.info(f" --- Epoch {epoch + 1} - Evaluation ---")

    model.eval()

    all_preds = []
    all_targets = []

    running_loss = 0.0

    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data[0].float(), data[1]
            inputs = inputs.to(device)
            labels = labels.to(device)

            logits = model(inputs)

            _, preds = torch.max(logits, 1)

            all_preds.extend(preds.cpu())
            all_targets.extend(labels.cpu())

            loss = loss_function(logits, labels)
            running_loss += loss.item()

    average_testing_loss = running_loss / len(train_loader)
    metrics["testing_loss"].append(average_testing_loss)

    all_preds = np.array(all_preds)
    all_targets = np.array(all_targets)

    # Calculate metrics
    metrics["accuracy"].append(accuracy_score(all_targets, all_preds))
    metrics["precision"].append(precision_score(all_targets, all_preds, average="macro"))
    metrics["recall"].append(recall_score(all_targets, all_preds, average="macro"))
    metrics["f1"].append(f1_score(all_targets, all_preds, average="macro"))

    for metric in metrics.keys():
        mlflow_client.log_metric(
            run_id=run.info.run_id,
            key=metric,
            value=metrics[metric][-1],
            step=epoch,
        )

    if epoch == EPOCHS - 1:
        cm = confusion_matrix(all_targets, all_preds)

    logger.info(f"""
    Epoch {epoch + 1}:
    - Accuracy  : {accuracies[-1]}
    - Precision : {precisions[-1]}
    - Recall    : {recalls[-1]}
    - F1        : {f1s[-1]}
    """)

notify("Jupyterlab", "Entrenamiento del modelo terminado.")

INFO:__main__:
Beginning model training with parameters:
- Epochs: 1
- Batch Size: 24

INFO:__main__: --- Epoch 1 - Training ---


: 

: 

## Evaluation

### Confusion Matrix

In [None]:
import seaborn as sns

sns.heatmap(data=cm)

### Accuracy/Recall/Precision

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1, figsize=(8,4))

ax.plot(metrics["accuracy"], label="Accuracy", marker=".")
ax.plot(metrics["precision"], label="Precision", marker=".")
ax.plot(metrics["recall"], label="Recall", marker=".")
ax.plot(metrics["f1"], label="F1", marker=".")

ax.set_xticks(range(0, epoch+1))
ax.set_xticklabels(range(1, epoch+2))
ax.set_yticks(np.arange(0, 1.1, 0.1))
ax.set_yticklabels(np.arange(0, 1.1, 0.1))
ax.set_xlabel("Epoch")
ax.legend()
ax.grid(alpha=0.1)
plt.show()

### Loss

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1, figsize=(8,4))

ax.plot(metrics["training_loss"], label="Training Loss", marker=".", color="steelblue")
ax.plot(metrics["testing_loss"], label="Evaluation Loss", marker=".", color="orange")

ax.set_xticks(range(0, epoch+1))
ax.set_xticklabels(range(1, epoch+2))
ax.set_xlabel("Epoch")
ax.legend()
ax.grid(alpha=0.1)
plt.show()

## Save model

In [None]:
from recognizer.utils.constants import ROOT_DIR

MODELS_DIR = ROOT_DIR / "models"

MODEL_PATH = MODELS_DIR / "nin.pth"

# with open(MODEL_PATH, "w") as f:
#         f.write("")

# torch.save(model, f=MODEL_PATH)

In [None]:
mlflow_client.log_artifact(
    run_id=run.info.run_id,
    local_path=str(MODEL_PATH),
    artifact_path="model.pth",
)