# Proyecto Control Óptimo: Deep Learning como problema de Control Óptimo

## Comparación entre modelos clásicos de clasificación y la ResNet

*Universidad de Chile*  
*Facultad de Ciencias Físicas y Matemáticas*  
*Departamento de Ingeniería Matemática*

**MA4703-1 Control Óptimo: Teoría y Laboratorio**  
**Profesor:** Héctor Ramírez C.   
**Auxiliares:** Joaquín Márquez & Sebastián P. Pincheira   
**Ayudante:** Fraick Reyes

**Integrantes:** David Felipe, Alonso Urbina

### Importar liberías y código necesario
___

In [16]:
import sys, os
import torch
import numpy as np
import pandas as pd
from IPython.display import display

sys.path.append(os.path.abspath("../src"))

from data import *
from activations import *
from model import ResNetEuler
from train import *
from svm import *
from decision_tree import *
from random_forest import *
from mlp import *

### Generar datasets para almacenar resultados
___

In [18]:
metric_cols = ["model", "accuracy", "f1", "roc_auc", "mse", "train_time_s"]

table_spiral = pd.DataFrame(columns=metric_cols)  # X_tr_sp, y_tr_sp, X_va_sp, y_va_sp
table_donut = pd.DataFrame(columns=metric_cols)  # X_tr_dn, y_tr_dn, X_va_dn, y_va_dn
table_linear = pd.DataFrame(columns=metric_cols)  # X_tr_ln, y_tr_ln, X_va_ln, y_va_ln
table_sphere = pd.DataFrame(columns=metric_cols)
table_dhelix = pd.DataFrame(columns=metric_cols)  # X_tr_dh, y_tr_dh, X_va_dh, y_va_dh


def to_numpy(x):
    if hasattr(x, "detach"):
        return x.detach().cpu().numpy()
    return np.asarray(x)


def clean_table(df: pd.DataFrame):
    if df.empty:
        return pd.DataFrame({"info": ["Sin modelos evaluados aún"]})

    df2 = df.copy().reset_index(drop=True)

    for c in metric_cols:
        if c != "model" and c in df2.columns:
            if np.issubdtype(df2[c].dtype, np.number):
                df2[c] = df2[c].astype(float).round(4)

    return df2

### Crear datasets sintéticos
___

#### Espirales

In [20]:
device = "cpu"
torch.manual_seed(0)
X, y = make_spiral(n_per_class=1000, turns=1.75, noise=0.05, device=device)
n = X.size(0)
n_tr = int(0.8 * n)
X_tr_sp, y_tr_sp = X[:n_tr], y[:n_tr]
X_va_sp, y_va_sp = X[n_tr:], y[n_tr:]

#### Donut

In [21]:
device = "cpu"
torch.manual_seed(0)
X, y = make_donut(n_per_class=1000, r_inner=1.0, r_outer=3.0, noise=0.1, device=device)
n = X.size(0)
n_tr = int(0.8 * n)
X_tr_dn, y_tr_dn = X[:n_tr], y[:n_tr]
X_va_dn, y_va_dn = X[n_tr:], y[n_tr:]

#### Lineal

In [6]:
device = "cpu"
torch.manual_seed(0)
X, y = make_linear_dataset(n_per_class=1000, device=device)
n = X.size(0)
n_tr = int(0.8 * n)
X_tr_ln, y_tr_ln = X[:n_tr], y[:n_tr]
X_va_ln, y_va_ln = X[n_tr:], y[n_tr:]

#### Esferas anidadas

In [7]:
device = "cpu"
torch.manual_seed(0)
X, y = make_3d_nested_spheres(n_per_class=1500, device=device)
perm = torch.randperm(X.size(0), device=device)
X = X[perm]
y = y[perm]
n = X.size(0)
n_tr = int(0.8 * n)
X_tr_sph, y_tr_sph = X[:n_tr], y[:n_tr]
X_va_sph, y_va_sph = X[n_tr:], y[n_tr:]

#### Doble hélice

In [8]:
device = "cpu"
torch.manual_seed(0)
X, y = make_3d_double_helix(
    n_per_class=1000,
    turns=3.0,
    noise=0.08,
    device=device,
)
perm = torch.randperm(X.size(0), device=device)
X = X[perm]
y = y[perm]
n = X.size(0)
n_tr = int(0.8 * n)
X_tr_dh, y_tr_dh = X[:n_tr], y[:n_tr]
X_va_dh, y_va_dh = X[n_tr:], y[n_tr:]

### Entrenar modelos en los datasets creados
___

#### SVM

In [22]:
table_spiral.loc[len(table_spiral)] = train_eval_svm_rbf_cv(X_tr_sp, y_tr_sp)
table_donut.loc[len(table_donut)] = train_eval_svm_rbf_cv(X_tr_dn, y_tr_dn)
table_linear.loc[len(table_linear)] = train_eval_svm_rbf_cv(X_tr_ln, y_tr_ln)
table_sphere.loc[len(table_sphere)] = train_eval_svm_rbf_cv(X_tr_sph, y_tr_sph)
table_dhelix.loc[len(table_dhelix)] = train_eval_svm_rbf_cv(X_tr_dh, y_tr_dh)

#### Decision Tree

In [23]:
table_spiral.loc[len(table_spiral)] = train_eval_decision_tree_cv(X_tr_sp, y_tr_sp)
table_donut.loc[len(table_donut)] = train_eval_decision_tree_cv(X_tr_dn, y_tr_dn)
table_linear.loc[len(table_linear)] = train_eval_decision_tree_cv(X_tr_ln, y_tr_ln)
table_sphere.loc[len(table_sphere)] = train_eval_decision_tree_cv(X_tr_sph, y_tr_sph)
table_dhelix.loc[len(table_dhelix)] = train_eval_decision_tree_cv(X_tr_dh, y_tr_dh)

#### Random Forest

In [24]:
table_spiral.loc[len(table_spiral)] = train_eval_random_forest_cv(X_tr_sp, y_tr_sp)
table_donut.loc[len(table_donut)] = train_eval_random_forest_cv(X_tr_dn, y_tr_dn)
table_linear.loc[len(table_linear)] = train_eval_random_forest_cv(X_tr_ln, y_tr_ln)
table_sphere.loc[len(table_sphere)] = train_eval_random_forest_cv(X_tr_sph, y_tr_sph)
table_dhelix.loc[len(table_dhelix)] = train_eval_random_forest_cv(X_tr_dh, y_tr_dh)

#### MLP

In [25]:
table_spiral.loc[len(table_spiral)] = train_eval_mlp_cv(X_tr_sp, y_tr_sp)
table_donut.loc[len(table_donut)] = train_eval_mlp_cv(X_tr_dn, y_tr_dn)
table_linear.loc[len(table_linear)] = train_eval_mlp_cv(X_tr_ln, y_tr_ln)
table_sphere.loc[len(table_sphere)] = train_eval_mlp_cv(X_tr_sph, y_tr_sph)
table_dhelix.loc[len(table_dhelix)] = train_eval_mlp_cv(X_tr_dh, y_tr_dh)

#### ResNet

In [26]:
table_spiral.loc[len(table_spiral)] = train_eval_resnet_cv(
    X_tr_sp, y_tr_sp, model_name="ResNet", num_layers=50, epochs=800, delta_t=0.1
)
table_donut.loc[len(table_donut)] = train_eval_resnet_cv(
    X_tr_dn, y_tr_dn, model_name="ResNet", num_layers=50, epochs=800, delta_t=0.1
)
table_linear.loc[len(table_linear)] = train_eval_resnet_cv(
    X_tr_ln, y_tr_ln, model_name="ResNet", num_layers=50, epochs=800, delta_t=0.1
)
table_sphere.loc[len(table_sphere)] = train_eval_resnet_cv(
    X_tr_sph, y_tr_sph, model_name="ResNet", num_layers=80, epochs=3000, delta_t=0.1
)
table_dhelix.loc[len(table_dhelix)] = train_eval_resnet_cv(
    X_tr_dh, y_tr_dh, model_name="ResNet", num_layers=80, epochs=3000, delta_t=0.1
)

Iniciando entrenamiento con 800 épocas...
[0001] loss=0.124673 | acc_tr=60.23% | L=0.0625 | acc_val=60.62%
[0800] loss=0.022690 | acc_tr=95.63% | L=1 | acc_val=94.69%
Iniciando entrenamiento con 800 épocas...
[0001] loss=0.120795 | acc_tr=61.72% | L=0.01562 | acc_val=56.56%
[0800] loss=0.012189 | acc_tr=96.33% | L=2 | acc_val=96.56%
Iniciando entrenamiento con 800 épocas...
[0001] loss=0.124756 | acc_tr=59.06% | L=0.0625 | acc_val=56.56%
[0800] loss=0.017372 | acc_tr=95.31% | L=2 | acc_val=92.50%
Iniciando entrenamiento con 800 épocas...
[0001] loss=0.122423 | acc_tr=57.66% | L=0.03125 | acc_val=61.87%
[0800] loss=0.012271 | acc_tr=96.80% | L=0.5 | acc_val=97.81%
Iniciando entrenamiento con 800 épocas...
[0001] loss=0.125115 | acc_tr=60.70% | L=0.0625 | acc_val=60.31%
[0800] loss=0.017253 | acc_tr=96.09% | L=0.5 | acc_val=94.38%
Iniciando entrenamiento con 800 épocas...
[0001] loss=0.135480 | acc_tr=71.41% | L=0.0625 | acc_val=68.12%
Early stopping at epoch 15, loss=0.000073
Iniciando 

### Tabla final
___

In [27]:
print("Espiral")
display(clean_table(table_spiral))

print("Donut")
display(clean_table(table_donut))

print("Linear")
display(clean_table(table_linear))

print("Esferas anidadas")
display(clean_table(table_sphere))

print("Doble hélice")
display(clean_table(table_dhelix))

Espiral


Unnamed: 0,model,accuracy,f1,roc_auc,mse,train_time_s
0,"SVM_RBF(C=3.0, gamma=scale)",0.8425 ± 0.0149,0.8437 ± 0.0160,0.9195 ± 0.0152,0.1142 ± 0.0112,0.1525 ± 0.0090
1,DecisionTree(max_depth=None),0.9406 ± 0.0074,0.9411 ± 0.0068,0.9406 ± 0.0074,0.0594 ± 0.0074,0.0031 ± 0.0003
2,"RandomForest(n=200, depth=None)",0.9581 ± 0.0042,0.9584 ± 0.0040,0.9875 ± 0.0066,0.0345 ± 0.0047,0.4661 ± 0.0694
3,"MLP(64, 64)",0.8762 ± 0.1306,0.8862 ± 0.1120,0.9137 ± 0.1415,0.0916 ± 0.0727,0.9111 ± 0.4282
4,ResNet,0.9519 ± 0.0184,0.9525 ± 0.0179,0.9812 ± 0.0115,0.0420 ± 0.0155,121.3316 ± 1.9383


Donut


Unnamed: 0,model,accuracy,f1,roc_auc,mse,train_time_s
0,"SVM_RBF(C=3.0, gamma=scale)",1.0000 ± 0.0000,1.0000 ± 0.0000,1.0000 ± 0.0000,0.0000 ± 0.0000,0.0088 ± 0.0003
1,DecisionTree(max_depth=None),0.9975 ± 0.0023,0.9975 ± 0.0023,0.9975 ± 0.0023,0.0025 ± 0.0023,0.0017 ± 0.0001
2,"RandomForest(n=200, depth=None)",0.9975 ± 0.0036,0.9975 ± 0.0036,1.0000 ± 0.0000,0.0015 ± 0.0014,0.6596 ± 0.1893
3,"MLP(64, 64)",1.0000 ± 0.0000,1.0000 ± 0.0000,1.0000 ± 0.0000,0.2104 ± 0.0010,0.2112 ± 0.0452
4,ResNet,0.9806 ± 0.0118,0.9801 ± 0.0123,0.9789 ± 0.0176,0.0184 ± 0.0115,3.2252 ± 0.8582


Linear


Unnamed: 0,model,accuracy,f1,roc_auc,mse,train_time_s
0,"SVM_RBF(C=3.0, gamma=scale)",0.9981 ± 0.0025,0.9981 ± 0.0025,1.0000 ± 0.0000,0.0013 ± 0.0014,0.0053 ± 0.0006
1,DecisionTree(max_depth=None),0.9975 ± 0.0050,0.9975 ± 0.0049,0.9975 ± 0.0050,0.0025 ± 0.0050,0.0013 ± 0.0003
2,"RandomForest(n=200, depth=None)",0.9988 ± 0.0025,0.9988 ± 0.0025,1.0000 ± 0.0000,0.0011 ± 0.0010,0.9707 ± 0.1058
3,"MLP(64, 64)",0.9963 ± 0.0031,0.9962 ± 0.0031,1.0000 ± 0.0000,0.0599 ± 0.0357,0.3012 ± 0.1146
4,ResNet,0.9975 ± 0.0031,0.9975 ± 0.0030,1.0000 ± 0.0000,0.0016 ± 0.0017,0.0869 ± 0.0137


Esferas anidadas


Unnamed: 0,model,accuracy,f1,roc_auc,mse,train_time_s
0,"SVM_RBF(C=3.0, gamma=scale)",1.0000 ± 0.0000,1.0000 ± 0.0000,1.0000 ± 0.0000,0.0000 ± 0.0000,0.0287 ± 0.0011
1,DecisionTree(max_depth=None),0.9942 ± 0.0036,0.9941 ± 0.0036,0.9942 ± 0.0036,0.0058 ± 0.0036,0.0035 ± 0.0002
2,"RandomForest(n=200, depth=None)",0.9971 ± 0.0031,0.9971 ± 0.0031,1.0000 ± 0.0000,0.0025 ± 0.0012,0.5465 ± 0.0678
3,"MLP(64, 64)",1.0000 ± 0.0000,1.0000 ± 0.0000,1.0000 ± 0.0000,0.0949 ± 0.0025,0.4109 ± 0.0718
4,ResNet,0.9850 ± 0.0066,0.9846 ± 0.0070,0.9882 ± 0.0056,0.0131 ± 0.0053,7.1849 ± 0.8957


Doble hélice


Unnamed: 0,model,accuracy,f1,roc_auc,mse,train_time_s
0,"SVM_RBF(C=3.0, gamma=scale)",0.6769 ± 0.0149,0.7040 ± 0.0243,0.6838 ± 0.0089,0.2162 ± 0.0040,0.1563 ± 0.0023
1,DecisionTree(max_depth=None),0.9350 ± 0.0129,0.9358 ± 0.0129,0.9351 ± 0.0129,0.0650 ± 0.0129,0.0058 ± 0.0016
2,"RandomForest(n=200, depth=None)",0.9875 ± 0.0044,0.9877 ± 0.0044,0.9990 ± 0.0009,0.0471 ± 0.0012,0.5677 ± 0.0801
3,"MLP(64, 64)",0.7544 ± 0.1771,0.7673 ± 0.1671,0.7795 ± 0.1787,0.1630 ± 0.0872,0.8053 ± 0.5389
4,ResNet,0.9725 ± 0.0167,0.9732 ± 0.0162,0.9925 ± 0.0057,0.0223 ± 0.0134,94.7659 ± 16.7045


___