In [1]:
from sklearn import datasets, model_selection
from matplotlib import pyplot as plt
import pandas as pd
import torch
from torch import nn, optim

In [7]:
# Creating 1000 circles
n_samples = 1000
SEED = 42
X, y = datasets.make_circles(n_samples=n_samples, noise=.05, random_state=SEED)

if len(set(y)) == 2:
    print("Binary Classification")
elif len(set(y)) > 2:
    print("Multi-Class Classification")

# 2 classes in 'y', viewed as tabular data. 
df = pd.DataFrame({'x1': X[:, 0], 'x2': X[:, 1], 'y': y})
df.head()

# Plotting the data, perhaps clustering is a good idea?
# plt.scatter(X[:, 0], X[:, 1], c=y)

# Tensoring
X = torch.from_numpy(X).float()
y = torch.from_numpy(y).float()

# Splitting the data
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, random_state=SEED)

# Build a non-linear model (has activation fn)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
class ClassificationModel(torch.nn.Module):
    def __init__(self, input_features, output_features, hidden_neurons=8) -> None:
        super().__init__()

        # 2 Layers
        self.linear1 = nn.Linear(input_features, hidden_neurons)
        self.linear2 = nn.Linear(hidden_neurons, hidden_neurons)
        self.linear3 = nn.Linear(hidden_neurons, output_features)
        # Activation fn
        self.activation = nn.Sigmoid()

        # OR
        self.stack = nn.Sequential(
            nn.Linear(input_features, hidden_neurons),
            nn.Sigmoid(),
            nn.Linear(hidden_neurons, hidden_neurons),
            nn.Sigmoid(),
            nn.Linear(hidden_neurons, output_features),
            nn.Sigmoid()
        )

    # F-prop
    def forward(self, x):
        # x -> linear/activation folds
        import random
        if random.random() < 0.5:
            return self.activation(self.linear3(self.activation(self.linear2(self.activation(self.linear1(x))))))
        else:
            return self.stack(x)

model = ClassificationModel(2,1,8).to(device=device)

# Sanity check
def accuracy(y_pred, y_true):
    return ((y_pred > 0.5) == y_true).sum() / len(y_true)

# Training
torch.cuda.manual_seed(SEED)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 5_000

'''
Put data into gpu
'''
X_train, X_test, y_train, y_test = X_train.to(device=device), X_test.to(device=device), y_train.to(device=device), y_test.to(device=device)

for epoch in range(epochs+1):
    model.train()

    # F-prop - turning into a single array too.
    y_pred = model(X_train).squeeze()

    # # Loss
    loss = criterion(y_pred, y_train)
    acc = accuracy(y_pred, y_train)

    # # B-prop
    optimizer.zero_grad()
    loss.backward()

    # # Optimizer step
    optimizer.step()

    model.eval()
    with torch.inference_mode():

        # Use if using BCE w/ Logit : 
        # Prediction -> Output activation fn -> Prediction labels
        # y_pred = torch.round(torch.sigmoid(model(X_test)))
        y_pred = model(X_test).squeeze()

        # Loss
        loss = criterion(y_pred, y_test)
        acc = accuracy(y_pred, y_test)

    if epoch % 2500 == 0:
        print(f"Epoch: {epoch}, Loss: {loss}, Accuracy: {acc}")

# Improving the model (Hyperparams)
# 1. More layers
# 2. More neurons (weights/biases)
# 3. More data
# 4. More epochs/learning rate
# 5. Better optimizer
# 6. Better loss function
# 7. Better activation function
# 8. Better initialization
# 9. Better regularization
# 10. Better architecture

# Could look at metrics to check recall/precision


Binary Classification
Epoch: 0, Loss: 0.6936346292495728, Accuracy: 0.5
Epoch: 2500, Loss: 0.605886697769165, Accuracy: 0.8050000071525574
Epoch: 5000, Loss: 0.18751147389411926, Accuracy: 0.9399999976158142
