Here is the code used to make the TesnorBoard. There were issues with other files being imported into the jupyter notebook so there is also a analysis.py file in the notebooks folder that does work all the time. Therefore along with some of the analysis in this file there is this analysis and more are in the report.md file.

**All screenshots are in the report.md file along with extra analysis**

In [None]:
import importlib
from datetime import datetime


class TensorboardWriter():
    def __init__(self, log_dir, logger, enabled):
        self.writer = None
        self.selected_module = ""

        if enabled:
            log_dir = str(log_dir)

            # Retrieve vizualization writer.
            succeeded = False
            for module in ["torch.utils.tensorboard", "tensorboardX"]:
                try:
                    self.writer = importlib.import_module(module).SummaryWriter(log_dir)
                    succeeded = True
                    break
                except ImportError:
                    succeeded = False
                self.selected_module = module

            if not succeeded:
                message = "Warning: visualization (Tensorboard) is configured to use, but currently not installed on " \
                    "this machine. Please install TensorboardX with 'pip install tensorboardx', upgrade PyTorch to " \
                    "version >= 1.1 to use 'torch.utils.tensorboard' or turn off the option in the 'config.json' file."
                logger.warning(message)

        self.step = 0
        self.mode = ''

        self.tb_writer_ftns = {
            'add_scalar', 'add_scalars', 'add_image', 'add_images', 'add_audio',
            'add_text', 'add_histogram', 'add_pr_curve', 'add_embedding','add_graph'
        }
        self.tag_mode_exceptions = {'add_graph','add_histogram', 'add_embedding'}
        self.timer = datetime.now()

    def set_step(self, step, mode='train'):
        self.mode = mode
        self.step = step
        if step == 0:
            self.timer = datetime.now()
        else:
            duration = datetime.now() - self.timer
            self.add_scalar('steps_per_sec', 1 / duration.total_seconds())
            self.timer = datetime.now()

    # def add_graph(self, model, input_to_model=None, verbose=False, use_strict_trace=True):
        # if self.writer is not None:
            # self.writer.add_graph(model, input_to_model, verbose, use_strict_trace)

    def __getattr__(self, name):
        """
        If visualization is configured to use:
            return add_data() methods of tensorboard with additional information (step, tag) added.
        Otherwise:
            return a blank function handle that does nothing
        """
        if name in self.tb_writer_ftns:
            # Loads the actual function from tensorboard, with the current writer's method as default.
            add_data = getattr(self.writer, name, None)

            def wrapper(tag, data, *args, **kwargs):
                if add_data is not None:
                    # add mode(train/valid) tag
                    if name not in self.tag_mode_exceptions:
                        tag = '{}/{}'.format(tag, self.mode)
                    add_data(tag, data, self.step, *args, **kwargs)
            return wrapper
        else:
            # default action for returning methods defined in this class, set_step() for instance.
            try:
                attr = object.__getattr__(name)
            except AttributeError:
                raise AttributeError("type object '{}' has no attribute '{}'".format(self.selected_module, name))
            return attr


In [None]:
import torch
class Metrics:
    def __init__(self, tolerance=0.05):
        self.tolerance = tolerance

    # mean squared error loss
    def mse_loss(self, predictions, targets):
        return torch.mean((predictions - targets) ** 2)

    # accuracy
    def accuracy(self, predictions, targets):
        correct = (torch.abs(predictions - targets) < self.tolerance).float()
        return correct.mean()

    # log metrics (epoch, loss, accuracy)
    # will print training and validation loss and training and validation accuracy
    def log(self, epoch, train_loss, val_loss, train_acc=None, val_acc=None):
        print_msg = f"Epoch {epoch} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}"
        if train_acc is not None and val_acc is not None:
            print_msg += f" | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}"
        print(print_msg)

In [None]:
# import necessary libraries
import time
import json
import torch
import sys
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader

sys.path.append('/Users/sydneymarder/Desktop/homework #8')

from model.metric import Metrics
from model.dynamic_model import DenseModel
from data_loader.function_dataset import FunctionDataset

In [None]:
# function to load configurations
def load_config(config_file):
    with open(config_file, 'r') as f:
        return json.load(f)

# Load 4 different configurations for different experiments
configs = {
    "Basic Configuration": load_config("configs/config.json"),
    "Optimal Configuration": load_config("configs/optimal.json"),
    "Overfit Configuration": load_config("configs/overfit.json"),
    "Underfit Configuration": load_config("configs/underfit.json")
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
 # run the experiments
for config_name, config in configs.items():
    print(f"Running experiment: {config_name}")

    # Initialize TensorBoard writer
    log_dir = f'runs/{config_name.replace(" ", "_")}'
    writer = SummaryWriter(log_dir)

    # Initialize model
    model = DenseModel(
        hidden_layers=config["hidden_layers"],
        neurons_per_layer=config["neurons_per_layer"],
        activation_hidden=config["activation_hidden"],
        activation_output=config["activation_output"],
    ).to(device)

    # Initialize dataset and DataLoader
    train_dataset = FunctionDataset(n_samples=1000, function="linear")
    val_dataset = FunctionDataset(n_samples=200, function="linear")
    train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)

    # Log model graph
    print(model)
    dummy_input = torch.ones(1, 1).to(device)
    writer.add_graph(model, dummy_input)

    # Initialize metrics and optimizer
    metrics = Metrics(tolerance=0.05)
    optimizer = torch.optim.Adam(model.parameters(), lr=config["learning_rate"])
    
    # Training loop
    for epoch in range(config["num_epochs"]):
        start_time = time.time()

        # Training phase
        model.train()
        total_train_loss, total_train_accuracy = 0.0, 0.0

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            predictions = model(x)
            loss = metrics.mse_loss(predictions, y)
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()
            total_train_accuracy += metrics.accuracy(predictions, y).item()

        avg_train_loss = total_train_loss / len(train_loader)
        avg_train_accuracy = total_train_accuracy / len(train_loader)

        # Log training metrics
        writer.add_scalar('Loss/train', avg_train_loss, epoch)
        writer.add_scalar('Accuracy/train', avg_train_accuracy, epoch)

        # Validation phase
        model.eval()
        total_val_loss, total_val_accuracy = 0.0, 0.0

        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                predictions = model(x)
                loss = metrics.mse_loss(predictions, y)
                total_val_loss += loss.item()
                total_val_accuracy += metrics.accuracy(predictions, y).item()

        avg_val_loss = total_val_loss / len(val_loader)
        avg_val_accuracy = total_val_accuracy / len(val_loader)

        # Log validation metrics
        writer.add_scalar('Loss/val', avg_val_loss, epoch)
        writer.add_scalar('Accuracy/val', avg_val_accuracy, epoch)
        writer.add_scalar('Time/epoch', time.time() - start_time, epoch)

        # Print log
        metrics.log(epoch, avg_train_loss, avg_val_loss, avg_train_accuracy, avg_val_accuracy)

    # Close TensorBoard writer
    writer.close()


### Training Time (Epochs to Convergence)

- **Overfit Configuration**:  
  - Converges the slowest due to the high model complexity and excessive parameters.  
  - Training loss continues to decrease while validation loss starts increasing, indicating memorization rather than learning.  
  - The loss curve suggests prolonged optimization with little improvement in generalization.  
  - While the model achieves high accuracy on the training set, its performance on validation data deteriorates.  

- **Optimal Configuration**:  
  - Balances training speed and generalization, converging efficiently within a reasonable number of epochs.  
  - The model avoids unnecessary parameter tuning and instead focuses on meaningful feature extraction.  
  - Training and validation losses stabilize at a good point, preventing both overfitting and underfitting.  
  - This configuration is ideal for deployment as it maximizes accuracy while maintaining computational efficiency.  

- **Underfit Configuration**:  
  - Converges very quickly but lacks the capacity to learn complex patterns.  
  - The model reaches a plateau early on, failing to improve due to limited parameters or insufficient training.  
  - Training and validation losses remain high, indicating that the model is unable to capture the underlying structure of the data.  
  - This configuration results in poor accuracy and should not be used in practice.  

- **Basic Configuration**:  
  - Shows moderate convergence speed, neither as fast as the underfit model nor as slow as the overfit one.  
  - Its effectiveness depends on hyperparameter tuning, as it may require adjustments to optimize learning.  
  - Performs reasonably well but does not outperform the optimal configuration. 

### Training Loss Curves

- **Overfit Configuration**:  
  - The loss curve sharply drops early, followed by continuous fine-tuning, indicating memorization rather than real learning.  
  - Training loss remains low, but the validation loss starts diverging, proving the model is overfitting.  

- **Optimal Configuration**:  
  - Training loss decreases steadily and stabilizes, preventing overfitting while still achieving good accuracy.  
  - Validation loss does not diverge significantly, showing that the model generalizes well to unseen data.  

- **Underfit Configuration**:  
  - The loss curve flattens almost immediately, suggesting the model stops learning early.  
  - The inability to reduce loss further signals insufficient model complexity or poor feature extraction.  

- **Basic Configuration**:  
  - The loss curve shows decent improvement, but further hyperparameter tuning is needed to match the performance of the optimal configuration.  

### Validation Loss Curves

- **Overfit Configuration**:  
  - Validation loss initially decreases but then starts increasing due to overfitting.  
  - The model fails to generalize, leading to poor validation performance despite low training loss.  

- **Optimal Configuration**:  
  - Validation loss decreases and remains stable, showing that the model does not overfit.  
  - This behavior suggests the model learns meaningful patterns that generalize well to unseen data.  

- **Underfit Configuration**:  
  - Validation loss remains high, confirming the model's inability to extract useful information from the data.  
  - This model is not suitable for real-world use as it does not perform well even on training data.  

- **Basic Configuration**:  
  - The validation loss curve is more stable than in the overfitting case but requires further tuning to reach optimal generalization.  


### Model Architectures using `add_graph`

- **Overfit Configuration**:
  - A highly complex model with excessive parameters, leading to overfitting.  
  - Deeper layers and a large number of neurons allow it to memorize data rather than generalize.  

- **Optimal Configuration**:
  - A well-balanced architecture with enough capacity to learn meaningful patterns without overfitting.  
  - Features sufficient depth and width to capture complex representations while avoiding excessive complexity.  

- **Underfit Configuration**:
  - A shallow network with insufficient parameters, making it incapable of learning useful patterns.  
  - The model lacks the capacity to represent the data adequately, leading to high bias.  

- **Basic Configuration**:
  - A moderately complex architecture, but not optimized for the dataset.  
  - Can be adjusted through hyperparameter tuning to improve performance.  
