In [None]:
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt

import seaborn as sns
import torch 

points, labels = make_moons(n_samples=100, shuffle=True, noise=0.1, random_state=0)

sns.scatterplot(
    x=points[:, 0], y=points[:, 1], hue=labels, marker="o", s=25, edgecolor="k", legend=True
).set_title("Data")
plt.show()

learning_rate = 0.01
n_epochs = 1000

X = torch.from_numpy(points).to(dtype=torch.float32)
y = torch.from_numpy(labels).to(dtype=torch.float32)

# W1 = torch.randn((2, X.shape[1]))
# b1 = torch.randn((2, 1))

# W2 = torch.randn((1, W1.shape[0]))
# b2 = torch.randn((1, 1))

W1 = torch.randn((3, X.shape[1]))
b1 = torch.zeros((3, 1))

W2 = torch.randn((5, W1.shape[0]))
b2 = torch.zeros((5, 1))

W3 = torch.randn((1, W2.shape[0]))
b3 = torch.zeros((1, 1))

parameters = [W1, b1, W2, b2, W3, b3]

for p in parameters:
    p.requires_grad = True

loss = []

for i in range(n_epochs):
    a1 = (W1 @ X.T + b1).relu()
    a2 = (W2 @ a1 + b2).relu()
    a3 = (W3 @ a2 + b3).sigmoid()
    
    Y = a3.squeeze(0)

    L = torch.nn.functional.binary_cross_entropy(Y, y)
    loss.append(L.item())
    print(f'Epoch {i}: loss={L.item()}')
    L.backward()

    for p in parameters:
        p.data -= learning_rate * p.grad

plt.plot(loss)
plt.show()

predictions = Y.detach().numpy() 
predictions = (predictions > 0.5).astype(int)

sns.scatterplot(
    x=points[:, 0], y=points[:, 1], hue=predictions, marker="o", s=25, edgecolor="k", legend=True
).set_title("Predictions")
plt.show()
