# Neural Networks: A Hands-on Tutorial

In this notebook, we'll learn about neural networks by building and training them interactively. We'll use a simple yet practical example: predicting house prices based on various features.

### Learning Objectives:
1. Understand neural network architecture
2. Visualize how neural networks learn
3. Experiment with different hyperparameters
4. See the impact of different network architectures

In [1]:
%pip install -q torch pandas numpy matplotlib seaborn scikit-learn kagglehub ipywidgets

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m40.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m32.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Import required libraries
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import kagglehub
import ipywidgets as widgets
from IPython.display import display, clear_output

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [3]:
# Load and preprocess California housing dataset
df = pd.read_csv(kagglehub.dataset_download("camnugent/california-housing-prices")+'/housing.csv')

# Select features and target
features = ['median_income', 'housing_median_age', 'total_rooms',
           'total_bedrooms', 'population', 'households', 'latitude', 'longitude']
X = df[features]
y = df['median_house_value']

# Scale features and target
X_scaler = StandardScaler()
y_scaler = StandardScaler()

X_scaled = X_scaler.fit_transform(X)
y_scaled = y_scaler.fit_transform(y.values.reshape(-1, 1))

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test)

Downloading from https://www.kaggle.com/api/v1/datasets/download/camnugent/california-housing-prices?dataset_version_number=1...


100%|██████████| 400k/400k [00:00<00:00, 19.9MB/s]

Extracting files...





## Part 1: Understanding Neural Network Architecture

Let's create a visual representation of a neural network and see how it processes data.

In [4]:
class NeuralNetworkVisualizer:
    def __init__(self, input_size, hidden_sizes, output_size):
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size

    def plot_architecture(self):
        plt.figure(figsize=(12, 8))

        # Calculate layer positions
        layer_sizes = [self.input_size] + self.hidden_sizes + [self.output_size]
        layer_positions = np.linspace(0, 1, len(layer_sizes))
        max_neurons = max(layer_sizes)

        # Plot neurons and connections
        for i, (size, x_pos) in enumerate(zip(layer_sizes, layer_positions)):
            y_positions = np.linspace(0, 1, size)

            # Plot neurons
            plt.scatter([x_pos] * size, y_positions, s=100,
                       c='blue' if i == 0 else 'green' if i == len(layer_sizes)-1 else 'red')

            # Plot connections to next layer
            if i < len(layer_sizes) - 1:
                next_size = layer_sizes[i + 1]
                next_y_positions = np.linspace(0, 1, next_size)
                next_x_pos = layer_positions[i + 1]

                for y1 in y_positions:
                    for y2 in next_y_positions:
                        plt.plot([x_pos, next_x_pos], [y1, y2], 'gray', alpha=0.1)

        plt.title('Neural Network Architecture')
        plt.axis('off')
        plt.show()

# Create interactive widgets for architecture visualization
def visualize_architecture(hidden_layer1, hidden_layer2):
    visualizer = NeuralNetworkVisualizer(8, [hidden_layer1, hidden_layer2], 1)
    visualizer.plot_architecture()

# Create sliders
layer1_slider = widgets.IntSlider(min=1, max=20, value=10, description='Hidden Layer 1:')
layer2_slider = widgets.IntSlider(min=1, max=20, value=5, description='Hidden Layer 2:')

widgets.interactive(visualize_architecture,
                   hidden_layer1=layer1_slider,
                   hidden_layer2=layer2_slider)

interactive(children=(IntSlider(value=10, description='Hidden Layer 1:', max=20, min=1), IntSlider(value=5, de…

## Part 2: Building and Training Neural Networks

Now let's create an interactive neural network trainer where you can experiment with different architectures and parameters.

In [5]:
# First, make sure to import all necessary libraries
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import ipywidgets as widgets
from IPython.display import display, clear_output

class SimpleNeuralNetTrainer:
    def __init__(self, X_train, y_train, X_test, y_test):
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test

    def train(self, hidden_size1=20, hidden_size2=10, learning_rate=0.01, epochs=50):
        # Clear previous output
        clear_output(wait=True)

        # Create model
        model = nn.Sequential(
            nn.Linear(8, hidden_size1),
            nn.ReLU(),
            nn.Linear(hidden_size1, hidden_size2),
            nn.ReLU(),
            nn.Linear(hidden_size2, 1)
        )

        # Loss and optimizer
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        # Lists to store losses
        train_losses = []
        test_losses = []

        # Training loop
        plt.figure(figsize=(12, 5))
        for epoch in range(epochs):
            # Train
            model.train()
            optimizer.zero_grad()
            outputs = model(self.X_train)
            loss = criterion(outputs, self.y_train)
            loss.backward()
            optimizer.step()

            # Test
            model.eval()
            with torch.no_grad():
                test_outputs = model(self.X_test)
                test_loss = criterion(test_outputs, self.y_test)

            train_losses.append(loss.item())
            test_losses.append(test_loss.item())

            # Update plot every 5 epochs
            if epoch % 5 == 0:
                clear_output(wait=True)

                # Plot losses
                plt.subplot(1, 2, 1)
                plt.plot(train_losses, label='Training Loss')
                plt.plot(test_losses, label='Test Loss')
                plt.title('Training Progress')
                plt.xlabel('Epoch')
                plt.ylabel('Loss')
                plt.legend()

                # Plot predictions
                plt.subplot(1, 2, 2)
                y_pred = model(self.X_test).detach().numpy()
                plt.scatter(self.y_test.numpy(), y_pred, alpha=0.5)
                plt.plot([self.y_test.numpy().min(), self.y_test.numpy().max()],
                        [self.y_test.numpy().min(), self.y_test.numpy().max()],
                        'r--')
                plt.title('Predictions vs Actual')
                plt.xlabel('Actual Values')
                plt.ylabel('Predicted Values')

                plt.tight_layout()
                plt.show()

                print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}')

        global trained_model
        trained_model = model
        return model

def create_simple_trainer_widget():
    # Create trainer instance
    trainer = SimpleNeuralNetTrainer(X_train, y_train, X_test, y_test)

    # Create widgets
    hidden1 = widgets.IntSlider(value=20, min=5, max=50, description='Hidden Layer 1:', continuous_update=False)
    hidden2 = widgets.IntSlider(value=10, min=5, max=50, description='Hidden Layer 2:', continuous_update=False)
    lr = widgets.FloatSlider(value=0.01, min=0.0001, max=1, step=0.0001, description='Learning Rate:', continuous_update=False)
    epochs = widgets.IntSlider(value=50, min=10, max=200, description='Epochs:', continuous_update=False)

    # Create train button
    train_button = widgets.Button(description='Train Model')
    output = widgets.Output()

    # Define button click handler
    def on_button_clicked(b):
        with output:
            trainer.train(
                hidden_size1=hidden1.value,
                hidden_size2=hidden2.value,
                learning_rate=lr.value,
                epochs=epochs.value
            )

    train_button.on_click(on_button_clicked)

    # Display widgets
    print("Neural Network Trainer")
    print("Adjust parameters and click 'Train Model' to start training")
    display(widgets.VBox([hidden1, hidden2, lr, epochs, train_button, output]))

# Create and display the widget
create_simple_trainer_widget()

Neural Network Trainer
Adjust parameters and click 'Train Model' to start training


VBox(children=(IntSlider(value=20, continuous_update=False, description='Hidden Layer 1:', max=50, min=5), Int…

## Part 3: Understanding Network Behavior

Let's create some visualizations to understand how the network makes predictions.

In [7]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

def visualize_network(model, feature_names, y_scaler):

    def update_network(**features):
        # Prepare input tensor
        input_data = torch.tensor([[features[name] for name in feature_names]], dtype=torch.float32)

        # Forward pass through the network
        activations = [input_data]
        for layer in model:
            input_data = layer(input_data)
            if isinstance(layer, nn.ReLU):
                input_data = F.relu(input_data)
            activations.append(input_data)

        # Plot the network
        fig, ax = plt.subplots(figsize=(10, 5))
        ax.set_xlim(-10, len(activations) + 10)  # Move left by increasing negative x-limit
        ax.set_ylim(-10, max(len(a[0]) for a in activations) - 10)  # Move up by decreasing y-limit

        # Center nodes and adjust layout
        for i, layer_activations in enumerate(activations):
            layer_size = len(layer_activations[0])
            for j, activation in enumerate(layer_activations[0]):
                color = plt.cm.viridis(activation.item()+0.5)
                ax.add_patch(plt.Circle((i - 0.5, j - layer_size / 2 + 0.5), 0.3, color=color))  # Adjust x and y positions

        ax.axis('off')
        plt.show()

        # Create input tensor from features
        input_data = torch.zeros(1, len(feature_names))
        for i, (name, value) in enumerate(features.items()):
            input_data[0, i] = value

        # Make prediction
        with torch.no_grad():
            pred = model(input_data)
            pred_unscaled = y_scaler.inverse_transform(pred.numpy())

        print(f"Predicted House Price: ${pred_unscaled[0, 0]:,.2f}")

    # Create sliders for each input feature
    sliders = {name: widgets.FloatSlider(min=-3, max=3, step=0.1, description=name) for name in feature_names}

    # Create interactive widget
    interactive_widget = widgets.interactive(update_network, **sliders)

    # Display the interactive widget and prediction label
    display(widgets.VBox([interactive_widget]))

visualize_network(trained_model, features, y_scaler)

VBox(children=(interactive(children=(FloatSlider(value=0.0, description='median_income', max=3.0, min=-3.0), F…

## Part 4: Interactive Experiments

Let's create some interactive experiments to understand how neural networks learn.

In [8]:

class InteractiveExperiments:
    def __init__(self, X_train, y_train, X_test, y_test, feature_names):
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.feature_names = feature_names

    def experiment_learning_rates(self):
        learning_rates = [0.0001, 0.001, 0.01, 0.1]
        plt.figure(figsize=(12, 4))

        for lr in learning_rates:
            # Create a simple model with same architecture for all experiments
            model = nn.Sequential(
                nn.Linear(8, 20),
                nn.ReLU(),
                nn.Linear(20, 10),
                nn.ReLU(),
                nn.Linear(10, 1)
            )
            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            criterion = nn.MSELoss()
            losses = []

            for epoch in range(50):
                optimizer.zero_grad()
                outputs = model(self.X_train)
                loss = criterion(outputs, self.y_train)
                loss.backward()
                optimizer.step()
                losses.append(loss.item())

            plt.plot(losses, label=f'LR = {lr}')

        plt.title('Impact of Learning Rate on Training')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()

    def experiment_network_depth(self):
        architectures = [
            ([20], 'Single Hidden Layer'),
            ([20, 10], 'Two Hidden Layers'),
            ([20, 10, 5], 'Three Hidden Layers')
        ]

        plt.figure(figsize=(12, 4))

        for hidden_sizes, name in architectures:
            # Create custom model with variable depth
            layers = []
            prev_size = 8  # Input size matches our features
            for size in hidden_sizes:
                layers.extend([nn.Linear(prev_size, size), nn.ReLU()])
                prev_size = size
            layers.append(nn.Linear(prev_size, 1))

            model = nn.Sequential(*layers)
            optimizer = torch.optim.Adam(model.parameters())
            criterion = nn.MSELoss()
            losses = []

            for epoch in range(50):
                optimizer.zero_grad()
                outputs = model(self.X_train)
                loss = criterion(outputs, self.y_train)
                loss.backward()
                optimizer.step()
                losses.append(loss.item())

            plt.plot(losses, label=name)

        plt.title('Impact of Network Depth on Training')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()

    def experiment_optimizers(self):
        optimizers = {
            'SGD': torch.optim.SGD,
            'Adam': torch.optim.Adam,
            'RMSprop': torch.optim.RMSprop
        }

        plt.figure(figsize=(12, 4))

        for name, OptClass in optimizers.items():
            # Create a simple model with same architecture for all experiments
            model = nn.Sequential(
                nn.Linear(8, 20),
                nn.ReLU(),
                nn.Linear(20, 10),
                nn.ReLU(),
                nn.Linear(10, 1)
            )

            # Add momentum for SGD to make the comparison more fair
            kwargs = {'lr': 0.01}
            if name == 'SGD':
                kwargs['momentum'] = 0.9

            optimizer = OptClass(model.parameters(), **kwargs)
            criterion = nn.MSELoss()
            losses = []

            for epoch in range(50):
                optimizer.zero_grad()
                outputs = model(self.X_train)
                loss = criterion(outputs, self.y_train)
                loss.backward()
                optimizer.step()
                losses.append(loss.item())

            plt.plot(losses, label=name)

        plt.title('Comparing Different Optimizers')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()

    def experiment_regularization(self):
        regularization_methods = {
            'None': 0,
            'L1 (Lasso)': 0.01,
            'L2 (Ridge)': 0.01,
        }

        plt.figure(figsize=(12, 4))

        for name, weight_decay in regularization_methods.items():
            # Create a simple model with same architecture for all experiments
            model = nn.Sequential(
                nn.Linear(8, 20),
                nn.ReLU(),
                nn.Linear(20, 10),
                nn.ReLU(),
                nn.Linear(10, 1)
            )

            if name == 'L1 (Lasso)':
                # For L1, we'll manually apply it in the loss function
                optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
                regularization = lambda model: sum(p.abs().sum() for p in model.parameters())
            else:
                # For L2, we can use weight_decay parameter
                optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=weight_decay)
                regularization = lambda model: 0

            criterion = nn.MSELoss()
            train_losses = []
            test_losses = []

            for epoch in range(50):
                # Training
                model.train()
                optimizer.zero_grad()
                outputs = model(self.X_train)
                loss = criterion(outputs, self.y_train)

                if name == 'L1 (Lasso)':
                    l1_loss = weight_decay * regularization(model)
                    loss = loss + l1_loss

                loss.backward()
                optimizer.step()
                train_losses.append(loss.item())

                # Testing - no regularization applied for evaluation
                model.eval()
                with torch.no_grad():
                    test_outputs = model(self.X_test)
                    test_loss = criterion(test_outputs, self.y_test)
                    test_losses.append(test_loss.item())

            plt.plot(train_losses, label=f'{name} (Train)')
            plt.plot(test_losses, linestyle='--', label=f'{name} (Test)')

        plt.title('Effect of Regularization')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()

# Create and run the experiments
exp = InteractiveExperiments(X_train, y_train, X_test, y_test, features)

# Create interactive buttons for running experiments
experiment_buttons = {}
for experiment_name in ['experiment_learning_rates', 'experiment_network_depth',
                        'experiment_optimizers', 'experiment_regularization']:
    button = widgets.Button(description=experiment_name.replace('_', ' ').title())
    experiment_buttons[experiment_name] = button

output = widgets.Output()

def create_button_handler(experiment_name):
    def handler(b):
        with output:
            clear_output(wait=True)
            getattr(exp, experiment_name)()
    return handler

for experiment_name, button in experiment_buttons.items():
    button.on_click(create_button_handler(experiment_name))

# Display buttons
display(widgets.HBox(list(experiment_buttons.values())))
display(output)

HBox(children=(Button(description='Experiment Learning Rates', style=ButtonStyle()), Button(description='Exper…

Output()