In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

  cpu = _conversion_method_template(device=torch.device("cpu"))


### Architecture
Define a simple Feed Forward architecture

In [5]:
class FeedForwardNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))  # Apply first layer + activation
        x = self.fc2(x)               # Apply second layer
        return x
    

### Testing
Testing FeedForwardNet

In [6]:
# Create network
model = FeedForwardNet(input_size=784, hidden_size=128, output_size=10)

# Test with dummy data
x = torch.randn(32, 784)  # 32 images, 784 pixels each
output = model(x)

print(output.shape)  # torch.Size([32, 10])

torch.Size([32, 10])


### Regression Network Training

#### Architecture

In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(10, 20),  # Input: 10 features → 20 hidden
            nn.ReLU(),           # Non-linearity
            nn.Linear(20, 1)     # Hidden: 20 → 1 output
        )
    
    def forward(self, x):
        return self.layers(x)  # Sequential applies layers in order

#### Define model

In [10]:
model = Net()
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

#### Define training subroutine

In [9]:
def train(model, X_train, y_train, epochs=100):
    for epoch in range(epochs):
        # STEP 1: Forward pass - make predictions
        predictions = model(X_train)
        loss = criterion(predictions, y_train)
        # STEP 2: Backward pass - calculate gradients
        optimizer.zero_grad() # Clear old gradients (important!)
        loss.backward() # Compute gradients via backpropagation
        # STEP 3: Update weights
        optimizer.step() # Apply the calculated gradients
        
        # Print progress
        if epoch % 20 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item():.4f}")
    
    return model


#### Run test

In [11]:
# Create some random data
X = torch.randn(100, 10)  # 100 samples, 10 features
y = torch.randn(100, 1)   # 100 target values

# Train the model
trained_model = train(model, X, y, epochs=100)


Epoch 0, Loss: 1.1158
Epoch 20, Loss: 1.0120
Epoch 40, Loss: 0.9589
Epoch 60, Loss: 0.9203
Epoch 80, Loss: 0.8846


### Deeper Networks

#### Architecture

In [13]:
class DeepNet(nn.Module):
    def __init__(self):
        super().__init__()
        # Define all layers
        self.layer1 = nn.Linear(784, 512)
        self.layer2 = nn.Linear(512, 256)
        self.layer3 = nn.Linear(256, 128)
        self.layer4 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.2)  # Drop 20% of neurons
    
    def forward(self, x):
        # Layer 1: 784 → 512
        x = torch.relu(self.layer1(x))
        x = self.dropout(x)
        
        # Layer 2: 512 → 256
        x = torch.relu(self.layer2(x))
        x = self.dropout(x)
        
        # Layer 3: 256 → 128
        x = torch.relu(self.layer3(x))
        x = self.dropout(x)
        
        # Layer 4: 128 → 10 (no activation/dropout on output)
        x = self.layer4(x)
        return x

#### Create Model

In [14]:
# Create the model
model = DeepNet()

### Perceptron

#### Replicate Satlin function

In [2]:
class SatLin(nn.Module):
    def forward(self, x):
        return torch.clamp(x, min=0, max=1)

#### Create perceptron model

In [3]:
class Perceptron(nn.Module):
    def __init__(self):
        super().__init__()
        # 1. Definisci il layer lineare
        self.layer = nn.Linear(3, 1) 
        
        # 2. Definisci l'attivazione (Crei l'istanza qui!)
        self.activation = SatLin() 
    
    def forward(self, x):
        # Passaggio nel layer lineare (usa 'self.layer', non 'layer1')
        x = self.layer(x)
        
        # Passaggio nell'attivazione
        x = self.activation(x)
        
        return x

In [13]:
# --- 2. Prepariamo i Dati (I due pattern) ---
# Immaginiamo che l'input abbia 3 caratteristiche.
# Pattern 1: [1, 1, 1] -> Deve dare output 0
# Pattern 2: [0, 0, 0] -> Deve dare output 1

X_train = torch.tensor([
    [1.0, -1.0, 1.0],  # Pattern A
    [-1.0, -1.0, 1.0]   # Pattern B
])

# I target corrispondenti (etichette)
y_train = torch.tensor([
    [1.0],  # Target per Pattern A
    [0.0]   # Target per Pattern B
])

# --- 3. Setup del Training ---
model = Perceptron()

# Funzione di errore: calcola la distanza tra previsione e realtà
criterion = nn.MSELoss() 

# Ottimizzatore: Modifica i pesi. Il learning rate (lr) decide quanto "grandi" sono i passi
optimizer = optim.Adam(model.parameters(), lr=0.1)

# --- 4. Il Ciclo di Addestramento (Loop) ---
epochs = 100 # Quante volte gli facciamo vedere i dati

print("Inizio addestramento...")

for epoch in range(epochs):
    # A. Reset dei gradienti (fondamentale in PyTorch!)
    optimizer.zero_grad()
    
    # B. Forward pass: Il modello fa una previsione
    outputs = model(X_train)
    
    # C. Calcolo dell'errore (Loss)
    loss = criterion(outputs, y_train)
    
    # D. Backward pass: Calcola di quanto correggere i pesi
    loss.backward()
    
    # E. Step: Aggiorna effettivamente i pesi
    optimizer.step()
    
    # Stampiamo ogni 20 epoche come sta andando
    if (epoch+1) % 20 == 0:
        print(f'Epoca [{epoch+1}/{epochs}], Errore (Loss): {loss.item():.4f}')

# --- 5. Verifica Finale ---
print("\n--- Test Finale ---")
with torch.no_grad(): # Disabilita il calcolo gradienti per il test
    test_output = model(X_train)
    print(f"Pattern A  (Target 1) -> Predizione: {test_output[0].item():.4f}")
    print(f"Pattern B  (Target 0) -> Predizione: {test_output[1].item():.4f}")

Inizio addestramento...
Epoca [20/100], Errore (Loss): 0.0000
Epoca [40/100], Errore (Loss): 0.0000
Epoca [60/100], Errore (Loss): 0.0000
Epoca [80/100], Errore (Loss): 0.0000
Epoca [100/100], Errore (Loss): 0.0000

--- Test Finale ---
Pattern A  (Target 1) -> Predizione: 1.0000
Pattern B  (Target 0) -> Predizione: 0.0000
