# Fully Connected Experiments

This notebook contains the experiments with fully connected neural networks on the following datasets:
- [Iris]([https://www.kaggle.com/datasets/uciml/iris](https://www.kaggle.com/datasets/uciml/iris)) 
- [Heart Disease](https://www.kaggle.com/datasets/johnsmith88/heart-disease-dataset)
- [Naitzat](https://github.com/topnn/topnn_framework) (circles, rings and spheres)

## 1. Setup

In [1]:
import torch
import torch.nn as nn
import semitorch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import gc
import numpy as np
from IPython import display
from tqdm.notebook import tqdm, trange
import pandas as pd
import os
from collections import OrderedDict
from torch.utils.data import Dataset, DataLoader
from semitorch import MultiLRScheduler, MultiOptimizer
from itertools import chain
from typing import Union, Tuple

data_path = os.path.abspath("./data" if os.path.isdir("./data") else "../data")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"data path = {data_path}\ndevice = {device}")


def split_dataset(dataset: torch.utils.data.Dataset):
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    return torch.utils.data.random_split(dataset, [train_size, test_size])

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def resetmodel(model: nn.Module) -> None:
    @torch.no_grad()
    def weight_reset(m: nn.Module):
        reset_parameters = getattr(m, "reset_parameters", None)
        if callable(reset_parameters):
            m.reset_parameters()

    model.apply(fn=weight_reset)

[Taichi] version 1.6.0, llvm 15.0.1, commit f1c6fbbd, win, python 3.11.4
[Taichi] Starting on arch=cuda
data path = c:\Users\Bart Smets\Documents\semitorch\data
device = cuda


#### Load iris data
[https://www.kaggle.com/datasets/uciml/iris](https://www.kaggle.com/datasets/uciml/iris)

In [2]:
iris_df = pd.read_csv(
    os.path.join(data_path, "iris.csv"), index_col=0, dtype={"Species": "string"}
)
iris_x = torch.Tensor(iris_df.iloc[:, [0, 1, 2, 3]].to_numpy()).to(device)


iris_y = (
    iris_df["Species"]
    .map(
        {
            "Iris-setosa": 0,
            "Iris-versicolor": 1,
            "Iris-virginica": 2,
        }
    )
    .to_numpy()
)
iris_y = torch.Tensor(iris_y).to(torch.int64).to(device)
print(
    f"Iris dataset: input features = {iris_x.shape[1]}, classes = {torch.unique(iris_y).shape[0]}, samples = {len(iris_y)}"
)

# normalize
torch.nn.functional.normalize(iris_x, dim=0, out=iris_x);

iris_train, iris_test = split_dataset(torch.utils.data.TensorDataset(iris_x, iris_y))

Iris dataset: input features = 4, classes = 3, samples = 150


#### Load heart disease data

[https://www.kaggle.com/datasets/johnsmith88/heart-disease-dataset](https://www.kaggle.com/datasets/johnsmith88/heart-disease-dataset)

In [3]:
heart_df = pd.read_csv(os.path.join(data_path, "heart.csv"))
heart_x = (
    torch.Tensor(heart_df.iloc[:, range(13)].to_numpy()).to(torch.float32).to(device)
)
heart_y = torch.Tensor(heart_df.iloc[:, -1].to_numpy()).to(torch.int64).to(device)

print(
    f"Heart disease dataset: input features = {heart_x.shape[1]}, classes = {torch.unique(heart_y).shape[0]}, samples = {len(heart_y)}"
)

# normalize
torch.nn.functional.normalize(heart_x, dim=0, out=heart_x);

heart_train, heart_test = split_dataset(torch.utils.data.TensorDataset(heart_x, heart_y))

Heart disease dataset: input features = 13, classes = 2, samples = 303


#### Load Naitzat data

[https://github.com/topnn/topnn_framework](https://github.com/topnn/topnn_framework)

In [4]:
circles_x, circles_y = torch.load(
    os.path.join(data_path, "naitzat", "circles_type_8.pt")
)
circles_train, circles_test = split_dataset(
    torch.utils.data.TensorDataset(circles_x, circles_y)
)
print(
    f"Circles dataset: input features = {circles_x.shape[1]}, classes = {torch.unique(circles_y).shape[0]}, samples = {len(circles_y)}"
)

rings_x, rings_y = torch.load(os.path.join(data_path, "naitzat", "rings_9.pt"))
rings_train, rings_test = split_dataset(
    torch.utils.data.TensorDataset(rings_x, rings_y)
)
print(
    f"Rings dataset: input features = {rings_x.shape[1]}, classes = {torch.unique(rings_y).shape[0]}, samples = {len(rings_y)}"
)

spheres_x, spheres_y = torch.load(os.path.join(data_path, "naitzat", "spheres_9.pt"))
spheres_train, spheres_test = split_dataset(
    torch.utils.data.TensorDataset(spheres_x, spheres_y)
)
print(
    f"Spheres dataset: input features = {spheres_x.shape[1]}, classes = {torch.unique(spheres_y).shape[0]}, samples = {len(spheres_y)}"
)

Circles dataset: input features = 2, classes = 2, samples = 15950
Rings dataset: input features = 3, classes = 2, samples = 45000
Spheres dataset: input features = 3, classes = 2, samples = 37800


## 2. Models

### 2.1. Baseline linear-ReLU network

In [5]:

class LinearReLU(nn.Module):
    def __init__(self, num_features: int, num_classes: int, width: int = 16):
        super().__init__()
        self.stem = nn.Linear(num_features, width, bias=False)
        self.layer1 = nn.Sequential(
            nn.Linear(width, width, bias=False),
            nn.ReLU(),
        )
        self.layer2 = nn.Sequential(
            nn.Linear(width, width, bias=False),
            nn.ReLU(),
        )
        self.head = nn.Linear(width, num_classes, bias=False)

    def forward(self, x):
        y = self.stem(x)
        y = y + self.layer1(y)
        y = y + self.layer2(y)
        return self.head(y)

    def get_optimizer_and_scheduler(self, epochs: int, steps_per_epoch: int):
        optimizer = torch.optim.AdamW(self.parameters(), lr=2e-2, weight_decay=0.01)
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer,
            max_lr=2e-2,
            anneal_strategy="linear",
            pct_start=0.3,
            three_phase=True,
            final_div_factor=1000.0,
            div_factor=10.0,
            steps_per_epoch=steps_per_epoch,
            epochs=epochs,
        )
        return optimizer, scheduler

## 2.2. Tropical networks

In [8]:
class LinearMaxPlus(nn.Module):
    def __init__(self, num_features: int, num_classes: int, width: int = 16):
        super().__init__()
        self.stem = nn.Linear(num_features, width, bias=False)
        self.layer1 = nn.Sequential(
            nn.Linear(width, width // 2, bias=False),
            semitorch.MaxPlus(width // 2, width, bias=False),
        )
        self.layer2 = nn.Sequential(
            nn.Linear(width, width // 2, bias=False),
            semitorch.MaxPlus(width // 2, width, bias=False),
        )
        self.head = nn.Linear(width, num_classes, bias=False)

    def forward(self, x):
        y = self.stem(x)
        y = y + self.layer1(y)
        y = y + self.layer2(y)
        return self.head(y)

    def get_optimizer_and_scheduler(self, epochs: int, steps_per_epoch: int):
        return tropcial_optimizer_and_scheduler(self, epochs, steps_per_epoch)


class LinearMinPlus(nn.Module):
    def __init__(self, num_features: int, num_classes: int, width: int = 16):
        super().__init__()
        self.stem = nn.Linear(num_features, width, bias=False)
        self.layer1 = nn.Sequential(
            nn.Linear(width, width // 2, bias=False),
            semitorch.MinPlus(width // 2, width, bias=False),
        )
        self.layer2 = nn.Sequential(
            nn.Linear(width, width // 2, bias=False),
            semitorch.MinPlus(width // 2, width, bias=False),
        )
        self.head = nn.Linear(width, num_classes, bias=False)

    def forward(self, x):
        y = self.stem(x)
        y = y + self.layer1(y)
        y = y + self.layer2(y)
        return self.head(y)

    def get_optimizer_and_scheduler(self, epochs: int, steps_per_epoch: int):
        return tropcial_optimizer_and_scheduler(self, epochs, steps_per_epoch)


def tropcial_optimizer_and_scheduler(
    model: Union[LinearMaxPlus, LinearMinPlus], epochs: int, steps_per_epoch: int
):
    linear_params = chain(
        model.stem.parameters(),
        model.layer1[0].parameters(),
        model.layer2[0].parameters(),
        model.head.parameters(),
    )
    tropical_params = chain(model.layer1[1].parameters(), model.layer2[1].parameters())
    opt1 = torch.optim.AdamW(linear_params, lr=2e-2, weight_decay=0.01)
    sch1 = torch.optim.lr_scheduler.OneCycleLR(
        opt1,
        max_lr=2e-2,
        anneal_strategy="linear",
        pct_start=0.3,
        three_phase=True,
        final_div_factor=1000.0,
        div_factor=10.0,
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
    )
    opt2 = torch.optim.AdamW(tropical_params, lr=2e-3, weight_decay=0.01)
    sch2 = torch.optim.lr_scheduler.OneCycleLR(
        opt2,
        max_lr=2e-3,
        anneal_strategy="linear",
        pct_start=0.3,
        three_phase=True,
        final_div_factor=1000.0,
        div_factor=10.0,
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
    )
    optimizer = MultiOptimizer(opt1, opt2)
    scheduler = MultiLRScheduler(sch1, sch2)
    return optimizer, scheduler

In [9]:
m = LinearMinPlus(8,3)
print(count_parameters(m))
m2 = LinearReLU(8,3)
print(count_parameters(m2))

688
688
