In [None]:
import matplotlib.pyplot as plt
import torch
from torch import nn
import experiment


In [None]:
# The problem we want to solve is classyfing which concentric circle a given coordinate in the XY plane belongs to
# Lets use tools from scikit-learn to make the circles for us
from sklearn.datasets import make_circles

n_samples = 1000 # number of circles in our dataset

# Now to create the actual dataset. This function is pretty handy, it creates a training set for you
# (X=features, y=labels)
# It'll be interesting to see how this data is formatted
X, y = make_circles(
    n_samples, # number of vals in the dataset
    noise=0.03, # makes the circles imperfect
    random_state=42
)

In [None]:
# Now lets inspect our data to make sure we know the format before moving forward
print(f'X shape: {X.shape}, X type: {type(X)}')
print(f'y shape: {y.shape}, y type: {type(y)}')
print(f'first vals of X \n {X[:4]}') # seems like data are coordinates in XY plane (I.E. There are two features)
print(f'first vals of y \n {y[:4]}') # seems like the labels are describing which circle the cooridnate belongs to (0 or 1)

# Now lets plot the data
plt.figure()
plt.scatter(X[:,0], X[:,1]) # Seems like we have roughly two concentric circles with different radii

In [None]:
# Want to now create a model that will be able tell us which circle a point belongs to
# Now need to create the training and testing data splits before creating the model

# Convert from NDArray to Tensor
X = torch.from_numpy(X).type(torch.float)
y = torch.from_numpy(y).type(torch.float)

print(f'X type: {type(X)}, X dtype: {X.dtype}, X dims: {X.shape}')
print(f'y type: {type(y)}, y dtype: {y.dtype}, y dims: {y.shape}')

In [None]:
# sklearn comes with a nice function that'll split our data for us 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2, # Ratio of test data to use from full dataset; Training is the complement
    random_state=42,
)

In [None]:
# Now lets inpect our split data
print(f'X_train type: {type(X_train)}, X_train dtype: {X_train.dtype}, X_train dims: {X_train.shape}')
print(f'X_test type: {type(X_test)}, X_test dtype: {X_test.dtype}, X_test dims: {X_test.shape}')
print(f'y_train type: {type(y_train)}, y_train dtype: {y_train.dtype}, y_train dims: {y_train.shape}')
print(f'y_test type: {type(y_test)}, y_test dtype: {y_test.dtype}, y_test dims: {y_test.shape}')

In [None]:
# Now lets define our model, which will be a simple 2 layer NN
class CircleModel_V0(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        # Define the network architecture (2->5->1, one hidden layer of 5 neurons)
        # Note that the bias term in each layer is enabled by default
        self.network = nn.Sequential(
            nn.Linear(in_features=2, out_features=5), # Note that these are LINEAR layers (activation f'n is linear...)
            nn.Linear(in_features=5, out_features=1)
        )

    def forward(self, x):
        # Using sequential above makes this method simple. Will just pass input tensor
        # to the network above and torch will take care of the composition for us!
        return self.network(x)


In [None]:
# Now that the model has been defined, lets check out the properties
model = CircleModel_V0()
print(model)

In [None]:
# Now we can look at the output predictions on the test data for the untrained model
untrained_preds = model(X_test)
print(f'untrained_preds shape: {untrained_preds.shape}')
print(f'label shape: {y_test.shape}')
print(f'first ten predictions: {untrained_preds[:9]}') # Seems like need to squeeze the output predictions to match label dims
print(f'first ten labels: {y_test[:9]}')

In [None]:
# We can now define the loss and optimization functions
# For classification, MAE isn't useful (it's designed to be used with real valued outputs)
# We want to use cross-entropy loss (log loss, in this case binary) for classification problems
optimizer = torch.optim.SGD(lr=0.2, params=model.parameters())
loss_fn = nn.BCELoss()

In [None]:
# We can use the train/test loss as a function of epoch as way to tell how the model is doing
# like we did in the regression model. But another way to tell how well the model is doing is by
# looking at how RIGHT it is (as opposed to how wrong it is, which is what loss really tells)
# This can be done by creating a function that evaluates the accuracy of the model.

def model_accuracy(model_preds: torch.Tensor, labels: torch.Tensor) -> float:
    """ Determines how well the model performs based on the number of correct
    labels it predicts.

    returns: decimal value of accuracy
    """

    # Determine the total number of predictions that the model got right.
    # Counts all indices where model preds and labels are equal
    # and returns the ratio of correct preds to total preds (aka accuracy)
    return torch.eq(model_preds, labels).sum().item()/len(model_preds)

In [None]:
# Now we're able to train the model, but first lets dig a little deeper into what is actually
# being returned when we pass data to the model object
untrained_preds = model(X_test)
print(f'first ten predictions: {untrained_preds[:9]}') # The outputs are kinda random and seem to be meaningless (what does negative values imply here?) 

# These raw outputs have a name called logits. They need to be "fixed up" to mean anything (or allow you to compare outputs from two different runs)
# To do this, the convention is to normalize them using a different activation function for the final layer.
# A common function to use is sigmoid (historically for binary problems) or softmax in the modern times. This form makes the outputs probabilities!
# Softmax can be used in multi-classification problems (more than two classes in the output)
# In this case, if pred <= 0.5, the model should output that it belongs to label 0 and vice-versa.
untrained_preds = torch.sigmoid(untrained_preds) # Note that when using BCEWithLogitsLoss for loss function, it actually has sigmoid built-in.
print(f'New prediction outputs using sigmoid: {untrained_preds[:9]}') # sigmoid f'n transforms values to the range [0,1]

In [None]:
# We can easily create a function that goes from a probability to a label value, which can then
# be used in the accuracy function describe earlier

def probs_to_labels(pred_probs: torch.Tensor) -> torch.Tensor:
    """
    Takes output predictions from the model in the form of probabilities and
    converts those into label values (0 or 1)
    """
    return torch.round(pred_probs)

In [None]:
# Now lets try the functions out and check the accuracy
untrained_preds = torch.sigmoid(model(X_test).squeeze()) # output vals to probabilities
print(f'model accuracy: {model_accuracy(probs_to_labels(untrained_preds), y_test)}')

# According the above output, the initial random weights of the model get about
# a ~50% accuracy on being able to sort our points into the correct circle
# which is about what we expect (just as good as random guessing).
# Lets inspect the two tensors explicitly
print(f'untrained preds in label form: {probs_to_labels(untrained_preds)[:9]}')
print(f'labels: {y_test[:9]}')

In [None]:
# Now lets define functions used in the training loop that trains the model. We'll try to use functional
# style for this

### forward pass functions ###
def get_logits(x_train: torch.Tensor, model: torch.nn.Module) -> torch.Tensor:
    """
    Returns logits from a pass through the model as a 1d tensor.
    """
    return model(x_train).squeeze()

def logits_to_labels(y_logits: torch.Tensor, activation_fn) -> torch.Tensor:
    """
    Transforms logits to labels; logits -> probabilities -> labels.
    """
    # Activation function does the intermediate transformation from logits to probabilities.
    return probs_to_labels(activation_fn(y_logits))

def forward_pass(x_train: torch.Tensor, model: torch.nn.Module, activation_fn = torch.sigmoid) -> torch.Tensor:
    """
    Returns the labels after one pass through the model
    """
    return logits_to_labels (get_logits(x_train, model),
                             activation_fn)


### training metrics functions ###

def get_loss(y_preds: torch.Tensor, y_train: torch.Tensor, loss_fn: torch.nn) -> torch.nn:
    """
    Returns the loss between labels and predictions. Note: This function also produces side effects
    based on torch auto-grad implementation.
    """
    return loss_fn(y_preds, y_train)

# model_accuracy() defined above

def pass_metrics(y_preds: torch.Tensor, y_train: torch.Tensor, loss: torch.nn) -> dict:
    """
    Returns the pass metrics as a dictionary.
    """
    return {"loss": loss.item(), "accuracy": model_accuracy(y_preds, y_train)}

def display_metrics (train_metrics: dict, test_metrics: dict, epoch: int) -> None:
    """
    Prints the test and training metrics. The function only produces side effects.
    """
    print(f'Epoch: {epoch}')
    print(f'Train Loss: {train_metrics["loss"]} | Train Accuracy: {train_metrics["accuracy"]}')
    print(f'Test Loss: {test_metrics["loss"]} | Test Accuracy: {test_metrics["accuracy"]}')
    print('----------')

In [None]:
# Define the training/testing loop
n = 70
for epoch in range(n):
    # Set the model in training mode
    model.train()

    # Do one forward pass while also getting the training metrics.
    # Training metrics: Loss, Accuracy
    y_preds = forward_pass(X_train, model)
    train_loss = get_loss(y_preds, y_train, loss_fn)
    train_metrics = pass_metrics(y_preds, y_train, train_loss)

    # Perform backprop steps
    # Since these operations are purely side-effects, will not define functions for them
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    # Now to set the model in inference mode and do a test pass
    model.eval()
    with torch.inference_mode():
        # Do one forward pass using the test data and get test metrics
        test_preds = forward_pass(X_test, model)
        test_loss = get_loss(test_preds, y_test, loss_fn)
        test_metrics = pass_metrics(test_preds, y_test, test_loss)
    
    # Display metrics every 10 epochs
    if epoch % 10 == 0:
        display_metrics(train_metrics, test_metrics, epoch)

