# Toy Data

In [1]:
import numpy as np
import seaborn as sns
import pandas as pd
import torch
import torch.nn
import matplotlib.pyplot as plt

seed = 1337
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)

ModuleNotFoundError: No module named 'seaborn'

### Describing Normal Distribution

In [None]:
x = np.random.normal(3, size=1000)
sns.displot(x, kind="kde")
x

In [None]:
x = np.random.normal(loc=(-2, 3), size=(1000, 2))
sns.displot(x, kind="kde")
x

In [None]:
x = np.linspace(0, 5, 6)
y = np.linspace(0, 9, 10)

xx, yy = np.meshgrid(x, y)
print(xx)
print()
print(yy)

In [None]:
xy = np.vstack([xx.ravel(), yy.ravel()]).T
xy

In [None]:
z = np.sum(np.sin(xy), axis=1)
z

In [None]:
plt.contour(xx, yy, z.reshape(xx.shape))

In [None]:
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111, projection='3d')

ax.plot_surface(xx, yy, z.reshape(xx.shape))

In [None]:
TOP_CENTER = (3, 3)
BOTTOM_CENTER = (3, -2)

def get_toy_data(batch_size, top_center=TOP_CENTER, bottom_center=BOTTOM_CENTER):
    x_data = []
    y_targets = np.zeros(batch_size)
    for batch_i in range(batch_size):
        if np.random.random() > 0.5:  # (1)
            x_data.append(np.random.normal(loc=top_center))
            y_targets[batch_i] = 1
        else:
            x_data.append(np.random.normal(loc=bottom_center))
    return (torch.tensor(x_data, dtype=torch.float),
            torch.tensor(y_targets, dtype=torch.float))

x_data, y_truth = get_toy_data(10)
x_data, y_truth

In [None]:
def plot_toy_data(x_data, y_truth, perceptron=None):
    blue = []
    orange = []
    black_blue = []
    black_orange = []
    if perceptron:
        y_pred = perceptron(x_data).squeeze().detach()
        y_pred = (y_pred > 0.5).float()
    else:
        y_pred = y_truth

    for x_i, y_true_i, y_pred_i in zip(x_data, y_truth, y_pred):
        
        is_black = y_true_i != y_pred_i

        if y_true_i == 1.:
            if is_black:
                black_blue.append(x_i)
            else:
                blue.append(x_i)
        else:
            if is_black:
                black_orange.append(x_i)
            else:
                orange.append(x_i)
    
    if blue:
        blue = np.stack(blue)
        plt.scatter(blue[:,0], blue[:,1], marker="*", c="tab:blue", s=100)
    
    if orange:
        orange = np.stack(orange)
        plt.scatter(orange[:,0], orange[:,1], marker="o", c="tab:orange", s=100)

    if perceptron:
        if black_blue:
            black_blue = np.stack(black_blue)
            plt.scatter(black_blue[:,0], black_blue[:,1], marker="*", c="black", s=100)
        if black_orange:
            black_orange = np.stack(black_orange)
            plt.scatter(black_orange[:,0], black_orange[:,1], marker="o", c="black", s=100)

        # hyperplane
        xx = np.linspace(x_data[:,0].min() - 0.5, x_data[:,0].max() + 0.5, 30)
        yy = np.linspace(x_data[:,1].min() - 0.5, x_data[:,1].max() + 0.5, 30)
        xv, yv = np.meshgrid(xx, yy)
        xy = np.vstack([xv.ravel(), yv.ravel()]).T
        z = perceptron(torch.tensor(xy, dtype=torch.float)).detach().numpy().reshape(yv.shape)
        
        plt.contour(xx, yy, z, colors='k', linestyles=["--", "-", "--"], levels=[0.4, 0.5, 0.6])
    plt.show()

plot_toy_data(*get_toy_data(1024))
# plot_toy_data(*get_toy_data(1024), perceptron)


In [None]:
class Perceptron(torch.nn.Module):

    def __init__(self, input_dim):
        super(Perceptron, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, 1)

    def forward(self, x_in):
        return torch.sigmoid(self.fc1(x_in))

    def reset_parameters(self):
        for layer in self.children():
            if hasattr(layer, 'reset_parameters'):
                layer.reset_parameters()

x = torch.randn(5).view(5, 1).detach().clone()
print(x)
perceptron = Perceptron(input_dim=1)
y_prob = perceptron(x)
print(y_prob)
y_hat = (y_prob > 0.5).float()
print(y_hat)

In [None]:
lr = 0.01
input_dim = 2

batch_size = 1000
n_epochs = 12
n_batches = 5

perceptron = Perceptron(input_dim=input_dim)
optimizer = torch.optim.Adam(params=perceptron.parameters(), lr=lr)
bce_loss = torch.nn.BCELoss()

losses = []

x_data_static, y_truth_static = get_toy_data(batch_size)
plot_toy_data(x_data_static, y_truth_static)

In [None]:
change = 1.0
last = 10.0
epsilon = 1e-3
epoch = 0

#while change > epsilon or epoch < n_epochs or last > 0.3:
for epoch in range(n_epochs):
    for _ in range(n_batches):

        optimizer.zero_grad()
        x_data, y_target = get_toy_data(batch_size)
        y_pred = perceptron(x_data).squeeze()
        
        loss = bce_loss(y_pred, y_target)
        loss.backward()
        optimizer.step()

        loss_value = loss.item()
        losses.append(loss_value)

        change = abs(last - loss_value)
        last = loss_value

    print(f"epoch: {epoch} loss: {loss:0.3}")
    plot_toy_data(x_data_static, y_truth_static, perceptron)
    
    
        


In [None]:
from torch.utils.data import Dataset, DataLoader

In [None]:
class ToyDataset(Dataset):

    def __init__(self, x_data, y_truth):
        self.x = x_data
        self.y = y_truth

    def __getitem__(self, index):
        return (self.x[index], self.y[index])
        # return {"x": self.x[index], "y": self.y[index]}

    def __len__(self):
        return len(self.x)

toydataset = ToyDataset(x_data_static, y_truth_static)
print(toydataset[0])
print(toydataset[-3:])
len(toydataset)


In [None]:
toy_loader = DataLoader(dataset=toydataset, batch_size=12, shuffle=True, drop_last=True)

count = 0
for minibatch in toy_loader:
    print(minibatch)
    count += 1
    break
count