In [33]:
import numpy as np
import pandas as pd
import pooch

import torch
import torch.nn as nn
import torch.optim as optim

import dotenv
from dotenv import load_dotenv; load_dotenv(dotenv_path="../.env")

import sys
sys.path.insert(0, "..")  # Project root (BIOS) so 'src' package can be found
import src.utils as utils

In [5]:
# Use GPU when available: MPS (Mac Apple Silicon), CUDA (Colab/cloud), else CPU
def get_device():
    if torch.cuda.is_available():
        return torch.device("cuda")
    if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
        return torch.device("mps")
    return torch.device("cpu")

device = get_device()
print(f"Using device: {device}")

Using device: mps


In [7]:
# Quick check: run in Cursor to confirm GPU is used
t = torch.randn(2, 2, device=device)
print(f"Tensor on {t.device} — GPU ready.")

Tensor on mps:0 — GPU ready.


In [12]:
class myNN(nn.Module):
    def __init__(self, in_dim, hidden, out_dim):
        super().__init__()
        # Create a list of layer dimensions: [in_dim, hidden..., out_dim]
        layer_dims = [in_dim] + hidden + [out_dim]
        # Create nn.Linear layers for each adjacent pair in layer_dims
        self.layers = nn.ModuleList([
            nn.Linear(layer_dims[i], layer_dims[i+1]) for i in range(len(layer_dims) - 1)
        ])
        self.relu = nn.ReLU()

    def forward(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x)
            # Apply ReLU to all layers except the last
            if i < len(self.layers) - 1:
                x = self.relu(x)
        return x

In [13]:
import torch

# Create some dummy data
# Let's say our network expects input dimension = 10, output dimension = 2
input_dim = 10
hidden_dims = [16, 8]
output_dim = 2

# Number of samples
num_samples = 100

# Dummy inputs: random floats
X = torch.randn(num_samples, input_dim, device=device)

# Dummy targets: random classes for multiclass classification
num_classes = 5  # Set your desired number of classes for multiclass
y = torch.randint(0, num_classes, (num_samples,), device=device)
y

tensor([2, 1, 2, 0, 2, 2, 3, 4, 1, 0, 2, 4, 0, 1, 2, 2, 3, 2, 3, 4, 3, 1, 2, 0,
        1, 2, 4, 0, 2, 3, 0, 0, 2, 3, 3, 0, 0, 4, 2, 2, 1, 1, 3, 3, 1, 4, 3, 0,
        4, 0, 3, 4, 1, 0, 1, 1, 4, 0, 0, 0, 3, 0, 4, 2, 0, 4, 3, 3, 4, 4, 0, 0,
        0, 4, 3, 1, 2, 1, 1, 2, 4, 4, 0, 3, 2, 0, 2, 0, 2, 2, 4, 2, 1, 1, 4, 2,
        1, 2, 2, 3], device='mps:0')

In [14]:
print(X.shape)
print(y.shape)

torch.Size([100, 10])
torch.Size([100])


In [17]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    train_size=.8, 
    test_size=.2, 
    random_state=42
)

In [23]:
model = myNN(in_dim=X.shape[1], hidden=[1], out_dim=5)
model.to(device)

loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=.001)

In [26]:
epochs = 1000

for e in range(epochs):
    predictions = model.forward(X_train)
    CEL = loss(predictions, y_train)

    predicted_prob = torch.softmax(predictions, dim=1)
    predicted_labels = torch.argmax(predictions, dim=1)

    CEL.backward()
    optimizer.step()
    optimizer.zero_grad()

    if (e+1) % 100 ==0:
        print(f"Epoch {e+1}/{epochs}. Loss: {CEL.item()}")

Epoch 100/1000. Loss: 1.4589228630065918
Epoch 200/1000. Loss: 1.4567832946777344
Epoch 300/1000. Loss: 1.4552819728851318
Epoch 400/1000. Loss: 1.4542059898376465
Epoch 500/1000. Loss: 1.4534366130828857
Epoch 600/1000. Loss: 1.4528130292892456
Epoch 700/1000. Loss: 1.452270269393921
Epoch 800/1000. Loss: 1.4518377780914307
Epoch 900/1000. Loss: 1.451453447341919
Epoch 1000/1000. Loss: 1.451120376586914


In [28]:
predicted_labels

tensor([0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 4, 2, 0, 0, 0, 0, 4, 0,
        0, 4, 0, 0, 4, 0, 2, 0, 0, 0, 0, 3, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 4, 4,
        0, 3, 0, 0, 0, 0, 0, 0, 4, 2, 3, 0, 0, 4, 3, 4, 0, 0, 0, 0, 0, 2, 4, 2,
        0, 0, 4, 4, 0, 0, 4, 0], device='mps:0')