In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
rng = np.random.default_rng(seed=42)
class Example2D:
    def __init__(self, x, y, label):
        self.x = x
        self.y = y
        self.label = label

def classify_two_gauss_data(num_samples, noise):
    points = []

    variance_scale = np.vectorize(lambda x: 0.5 + 3.5 * (x / 0.5))
    variance = variance_scale(noise)

    def gen_gauss(cx, cy, label):
        for _ in range(num_samples // 2):
            x = rng.normal(cx, variance)
            y = rng.normal(cy, variance)
            points.append(Example2D(x, y, label))

    gen_gauss(2, 2, 1)  # Gaussian with positive examples.
    gen_gauss(-2, -2, 0)  # Gaussian with negative examples.
    return points

def classify_spiral_data(num_samples, noise):
    points = []
    n = num_samples // 2

    def gen_spiral(delta_t, label):
        for i in range(n):
            r = i / n * 5
            t = 1.75 * i / n * 2 * np.pi + delta_t
            x = r * np.sin(t) + rng.uniform(-1, 1) * noise
            y = r * np.cos(t) + rng.uniform(-1, 1) * noise
            points.append(Example2D(x, y, label))

    gen_spiral(0, 1)  # Positive examples.
    gen_spiral(np.pi, 0)  # Negative examples.
    return points

def classify_xor_data(num_samples, noise):
    def get_xor_label(p):
        return 1 if p.x * p.y >= 0 else 0

    points = []
    for _ in range(num_samples):
        x = rng.uniform(-5, 5)
        padding = 0.3
        x += padding if x > 0 else -padding  # Padding.
        y = rng.uniform(-5, 5)
        y += padding if y > 0 else -padding
        noise_x = rng.uniform(-5, 5) * noise
        noise_y = rng.uniform(-5, 5) * noise
        label = get_xor_label(Example2D(x + noise_x, y + noise_y, None))
        points.append(Example2D(x, y, label))
    return points

def classify_circle_data(num_samples, noise):
    points = []
    radius = 5

    def get_circle_label(p, center):
        return 1 if np.linalg.norm([p.x - center.x, p.y - center.y]) < (radius * 0.5) else 0

    # Generate positive points inside the circle.
    for _ in range(num_samples // 2):
        r = rng.uniform(0, radius * 0.5)
        angle = rng.uniform(0, 2 * np.pi)
        x = r * np.sin(angle)
        y = r * np.cos(angle)
        noise_x = rng.uniform(-radius, radius) * noise
        noise_y = rng.uniform(-radius, radius) * noise
        label = get_circle_label(Example2D(x + noise_x, y + noise_y, None), Example2D(0, 0, None))
        points.append(Example2D(x, y, label))

    # Generate negative points outside the circle.
    for _ in range(num_samples // 2):
        r = rng.uniform(radius * 0.7, radius)
        angle = rng.uniform(0, 2 * np.pi)
        x = r * np.sin(angle)
        y = r * np.cos(angle)
        noise_x = rng.uniform(-radius, radius) * noise
        noise_y = rng.uniform(-radius, radius) * noise
        label = get_circle_label(Example2D(x + noise_x, y + noise_y, None), Example2D(0, 0, None))
        points.append(Example2D(x, y, label))
    return points

class DataGeneratorFactory:
    def __init__(self):
        self._factory_methods = {}

    def register(self, data_type, factory_method):
        self._factory_methods[data_type] = factory_method

    def create_generator(self, data_type, num_samples, noise):
        factory_method = self._factory_methods.get(data_type)
        if factory_method is None:
            raise ValueError(f"No factory method registered for data type: {data_type}")
        return factory_method(num_samples, noise)

df = DataGeneratorFactory()
df.register("gauss", classify_two_gauss_data)
df.register("circle", classify_circle_data)
df.register("xor", classify_xor_data)
df.register("spiral", classify_spiral_data)


In [None]:
import torch
from sklearn.metrics import confusion_matrix
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data import random_split
!pip install torch torchvision torchaudio -f https://download.pytorch.org/whl/torch_stable.html
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_cura

In [None]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, activation = 'relu'):
        super(MLP, self).__init__()

        # Определение функции активации
        if activation == 'sigmoid':
            activation = nn.Sigmoid()
        elif activation == 'tanh':
            activation = nn.Tanh()
        elif activation == 'relu':
            activation = nn.ReLU()
        else:
            raise ValueError("Неизвестная функция активации")

        # Список слоёв
        layers = []

        # Добавление входного слоя
        layers.append(nn.Linear(input_size, hidden_sizes[0]))
        layers.append(activation)

        # Добавление скрытых слоёв
        for i in range(len(hidden_sizes) - 1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1]))
            layers.append(activation)

        # Добавление выходного слоя
        layers.append(nn.Linear(hidden_sizes[-1], output_size))
        layers.append(nn.Sigmoid())

        # Создание последовательности слоёв
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
      # x.to(device)
      return self.layers(x)

In [None]:
def train(model, train_loader, optimizer, loss_function, epochs = 1000, verbose = False, test_data = None):
  for epoch in range(epochs):
    for i, data in enumerate(train_loader):
        inputs, labels = data
        # Forward pass
        outputs = model(inputs).squeeze()
        loss = loss_function(outputs, labels)

        # Backward pass
        optimizer.zero_grad()  # Clear previous gradients
        loss.backward()       # Calculate gradients for the entire batch

        # Update weights using the accumulated gradients
        optimizer.step()

    if verbose and epoch % 100 == 0:
      model.eval()
      with torch.inference_mode():
        test_input, test_labels = test_data
        test_output = model(test_input).squeeze()
        test_loss = loss_function(test_output, test_labels)
        print(f"epoch={epoch}, train_loss={loss}, test_loss={test_loss}")

In [None]:
# Assuming you have test_loader with test data
def compute_confusion_matrix(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            predicted_labels = (outputs > 0.5).float()  # Threshold predictions
            y_true.extend(labels.cpu())
            y_pred.extend(predicted_labels.cpu())

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    return cm

In [None]:
dataset_size = 640
ratio = 0.8
noise = 0.1
datatype = "circle"

input_size = 2
hidden_sizes = [4, 4, 4]
output_size = 1
activation = 'relu'

learning_rate = 0.03
epochs = 1000
batch = 64

In [None]:
d = df.create_generator(datatype, dataset_size, noise)
data = torch.from_numpy(np.array([[p.x,p.y] for p in d])).type(torch.FloatTensor).to(device)
labels = torch.from_numpy(np.array([p.label for p in d])).type(torch.FloatTensor).to(device)
dataset = TensorDataset(data,labels)

generator = torch.Generator().manual_seed(42)
train_data, test_data = random_split(dataset, [ratio, 1-ratio], generator=generator)
train_loader = DataLoader(train_data, batch_size = batch, shuffle = True, drop_last=True)
test_loader = DataLoader(test_data, batch_size = batch, shuffle = True, drop_last=False)

In [None]:
model = MLP(input_size, hidden_sizes, output_size)
model.to(device)

MLP(
  (layers): Sequential(
    (0): Linear(in_features=2, out_features=4, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4, out_features=4, bias=True)
    (3): ReLU()
    (4): Linear(in_features=4, out_features=4, bias=True)
    (5): ReLU()
    (6): Linear(in_features=4, out_features=1, bias=True)
    (7): Sigmoid()
  )
)

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_function = torch.nn.BCELoss()

In [None]:
train(model, train_loader, optimizer, loss_function, epochs = epochs, verbose = True, test_data = test_data[0:-1])

epoch=0, train_loss=0.6976854801177979, test_loss=0.6948580741882324
epoch=100, train_loss=0.27213385701179504, test_loss=0.2956138253211975
epoch=200, train_loss=0.07930422574281693, test_loss=0.09684564918279648
epoch=300, train_loss=0.09133585542440414, test_loss=0.07424002140760422
epoch=400, train_loss=0.0470026358962059, test_loss=0.06488482654094696
epoch=500, train_loss=0.04602641612291336, test_loss=0.059617992490530014
epoch=600, train_loss=0.05175651237368584, test_loss=0.056550223380327225
epoch=700, train_loss=0.06159396469593048, test_loss=0.055741239339113235
epoch=800, train_loss=0.055971525609493256, test_loss=0.055209673941135406
epoch=900, train_loss=0.05065659433603287, test_loss=0.0556144118309021


In [None]:
cm = compute_confusion_matrix(model, test_loader)
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[59  2]
 [ 1 65]]


# K Fold Cross Validation

In [None]:
from sklearn.model_selection import KFold

In [None]:
k=10

cv_dataset_size = 640
cv_ratio = 0.8
cv_noise = 0.1
cv_datatype = "circle"
cv_shuffle=True

cv_input_size = 2
cv_hidden_sizes = [4, 4, 4]
cv_output_size = 1

cv_activation = 'relu'
cv_loss_function = torch.nn.BCELoss()

cv_learning_rate = 0.03
cv_epochs = 1000
cv_batch = 64

In [None]:
kf = KFold(n_splits=k, shuffle=True)

In [None]:
def calc_loss(model, data_loader, loss_function):
  loss = 0.0
  model.eval()
  with torch.no_grad():
    for inputs, labels in data_loader:
          outputs = model(inputs).squeeze()
          loss = loss + loss_function(outputs, labels)
  return loss/(len(data_loader.dataset)/data_loader.batch_size)

In [None]:
def eval_fold(fold, data, train_indices, val_indices):
  model = MLP(cv_input_size, cv_hidden_sizes, cv_output_size)
  model.to(device)
  optimizer = torch.optim.SGD(model.parameters(), lr=cv_learning_rate)

  train_fold = torch.utils.data.Subset(data, train_indices)
  val_fold = torch.utils.data.Subset(data, val_indices)

  train_loader = DataLoader(train_fold, batch_size=cv_batch, shuffle=True)
  val_loader = DataLoader(val_fold, batch_size=cv_batch)

  train(model, train_loader, optimizer, cv_loss_function, cv_epochs)

  model.eval()
  with torch.no_grad():
    train_loss = calc_loss(model, train_loader, cv_loss_function)
    test_loss = calc_loss(model, val_loader, cv_loss_function)
    return fold, train_loss, test_loss

In [None]:
def cross_validation(kf, data):
  for fold, (train_indices, val_indices) in enumerate(kf.split(data)):
    fold, tr_l, t_l = eval_fold(fold, data, train_indices, val_indices)
    print(f"fold={fold+1}, train loss={tr_l}, test loss={t_l}")


In [None]:
cv_d = df.create_generator(cv_datatype, cv_dataset_size, cv_noise)
cv_data = torch.from_numpy(np.array([[p.x,p.y] for p in cv_d])).type(torch.FloatTensor).to(device)
cv_labels = torch.from_numpy(np.array([p.label for p in cv_d])).type(torch.FloatTensor).to(device)
cv_dataset = TensorDataset(cv_data,cv_labels)

In [None]:
for activ in ['relu','sigmoid','tanh']:
  for hidden_layers in [[4],[4,4],[4,4,4]]:
    print("#######Cross validation########\n")
    print(f"activation function={activ}, hidden_layers={hidden_layers}")
    cv_hidden_sizes = hidden_layers
    cv_activation = activ
    cross_validation(kf, cv_dataset)
    print("#############################\n")

#######Cross validation########

activation function=relu, hidden_layers=[4]
fold=1, train loss=0.061938121914863586, test loss=0.032712168991565704
fold=2, train loss=0.049510132521390915, test loss=0.0670933723449707
fold=3, train loss=0.047126006335020065, test loss=0.07030381262302399
fold=4, train loss=0.052470069378614426, test loss=0.0757589340209961
fold=5, train loss=0.06354282051324844, test loss=0.04404450207948685
fold=6, train loss=0.06327550858259201, test loss=0.10278534889221191
fold=7, train loss=0.05530061572790146, test loss=0.03341912850737572
fold=8, train loss=0.07216566801071167, test loss=0.03868803009390831
fold=9, train loss=0.05091844126582146, test loss=0.08520875871181488
fold=10, train loss=0.050578195601701736, test loss=0.06768236309289932
#############################

#######Cross validation########

activation function=relu, hidden_layers=[4, 4]
fold=1, train loss=0.04637281969189644, test loss=0.07471950352191925
fold=2, train loss=0.0518181920051574

In [None]:
text = """#######Cross validation########

activation function=relu, hidden_layers=[4]
fold=1, train loss=0.061938121914863586, test loss=0.032712168991565704
fold=2, train loss=0.049510132521390915, test loss=0.0670933723449707
fold=3, train loss=0.047126006335020065, test loss=0.07030381262302399
fold=4, train loss=0.052470069378614426, test loss=0.0757589340209961
fold=5, train loss=0.06354282051324844, test loss=0.04404450207948685
fold=6, train loss=0.06327550858259201, test loss=0.10278534889221191
fold=7, train loss=0.05530061572790146, test loss=0.03341912850737572
fold=8, train loss=0.07216566801071167, test loss=0.03868803009390831
fold=9, train loss=0.05091844126582146, test loss=0.08520875871181488
fold=10, train loss=0.050578195601701736, test loss=0.06768236309289932
#############################

#######Cross validation########

activation function=relu, hidden_layers=[4, 4]
fold=1, train loss=0.04637281969189644, test loss=0.07471950352191925
fold=2, train loss=0.05181819200515747, test loss=0.03268270939588547
fold=3, train loss=0.04177186265587807, test loss=0.09343992173671722
fold=4, train loss=0.04724962264299393, test loss=0.03245605155825615
fold=5, train loss=0.04197057709097862, test loss=0.08524176478385925
fold=6, train loss=0.0550776869058609, test loss=0.04684652388095856
fold=7, train loss=0.0466378889977932, test loss=0.07131344825029373
fold=8, train loss=0.04751024767756462, test loss=0.08539015799760818
fold=9, train loss=0.0501687228679657, test loss=0.05395772308111191
fold=10, train loss=0.050783220678567886, test loss=0.0638117641210556
#############################

#######Cross validation########

activation function=relu, hidden_layers=[4, 4, 4]
fold=1, train loss=0.038582563400268555, test loss=0.14682330191135406
fold=2, train loss=0.05619778111577034, test loss=0.03330474719405174
fold=3, train loss=0.04209086671471596, test loss=0.1014397144317627
fold=4, train loss=0.05071636661887169, test loss=0.06064063310623169
fold=5, train loss=0.048919837921857834, test loss=0.08579357713460922
fold=6, train loss=0.04880358651280403, test loss=0.05046030133962631
fold=7, train loss=0.04883314669132233, test loss=0.015696410089731216
fold=8, train loss=0.05629042163491249, test loss=0.0068982625380158424
fold=9, train loss=0.04393292963504791, test loss=0.06935713440179825
fold=10, train loss=0.0429922379553318, test loss=0.11475957930088043
#############################

#######Cross validation########

activation function=sigmoid, hidden_layers=[4]
fold=1, train loss=0.06120442599058151, test loss=0.033037591725587845
fold=2, train loss=0.4139784872531891, test loss=0.32335418462753296
fold=3, train loss=0.05804060772061348, test loss=0.06883373856544495
fold=4, train loss=0.05386362969875336, test loss=0.04015272855758667
fold=5, train loss=0.049223557114601135, test loss=0.06872212886810303
fold=6, train loss=0.05604306235909462, test loss=0.08771584928035736
fold=7, train loss=0.06337745487689972, test loss=0.02938687615096569
fold=8, train loss=0.0631006509065628, test loss=0.11301171779632568
fold=9, train loss=0.0607135184109211, test loss=0.04493892937898636
fold=10, train loss=0.05692972242832184, test loss=0.08320176601409912
#############################

#######Cross validation########

activation function=sigmoid, hidden_layers=[4, 4]
fold=1, train loss=0.041532158851623535, test loss=0.10140295326709747
fold=2, train loss=0.04991160333156586, test loss=0.014411959797143936
fold=3, train loss=0.0520985871553421, test loss=0.031305376440286636
fold=4, train loss=0.05112624540925026, test loss=0.08529554307460785
fold=5, train loss=0.05037635937333107, test loss=0.028411298990249634
fold=6, train loss=0.052543286234140396, test loss=0.02963157370686531
fold=7, train loss=0.0522150881588459, test loss=0.04861950874328613
fold=8, train loss=0.046014633029699326, test loss=0.07903715968132019
fold=9, train loss=0.04413898289203644, test loss=0.10750693082809448
fold=10, train loss=0.04103844240307808, test loss=0.12544624507427216
#############################

#######Cross validation########

activation function=sigmoid, hidden_layers=[4, 4, 4]
fold=1, train loss=0.049316324293613434, test loss=0.016394617035984993
fold=2, train loss=0.041502002626657486, test loss=0.12717106938362122
fold=3, train loss=0.042662013322114944, test loss=0.10083401203155518
fold=4, train loss=0.04358890280127525, test loss=0.1036643534898758
fold=5, train loss=0.04736218973994255, test loss=0.023548820987343788
fold=6, train loss=0.04568003490567207, test loss=0.053554512560367584
fold=7, train loss=0.042691558599472046, test loss=0.0804760605096817
fold=8, train loss=0.04189681261777878, test loss=0.07193788886070251
fold=9, train loss=0.06077497452497482, test loss=0.0647311881184578
fold=10, train loss=0.04574521258473396, test loss=0.042614080011844635
#############################

#######Cross validation########

activation function=tanh, hidden_layers=[4]
fold=1, train loss=0.05735822021961212, test loss=0.031849272549152374
fold=2, train loss=0.06060684472322464, test loss=0.07906538993120193
fold=3, train loss=0.35565483570098877, test loss=0.4414968490600586
fold=4, train loss=0.07139775902032852, test loss=0.0196441151201725
fold=5, train loss=0.057793863117694855, test loss=0.09647959470748901
fold=6, train loss=0.037092868238687515, test loss=0.179653137922287
fold=7, train loss=0.062234751880168915, test loss=0.031497322022914886
fold=8, train loss=0.0499696210026741, test loss=0.04805334284901619
fold=9, train loss=0.05542701482772827, test loss=0.10657373815774918
fold=10, train loss=0.06355302780866623, test loss=0.09744337946176529
#############################

#######Cross validation########

activation function=tanh, hidden_layers=[4, 4]
fold=1, train loss=0.04526253417134285, test loss=0.1510665863752365
fold=2, train loss=0.05407264828681946, test loss=0.05250776559114456
fold=3, train loss=0.052001792937517166, test loss=0.06621548533439636
fold=4, train loss=0.04665418341755867, test loss=0.06147097796201706
fold=5, train loss=0.04562045633792877, test loss=0.0696239173412323
fold=6, train loss=0.04407883062958717, test loss=0.06765343248844147
fold=7, train loss=0.039475370198488235, test loss=0.12483766674995422
fold=8, train loss=0.0507468581199646, test loss=0.03630495071411133
fold=9, train loss=0.05241463705897331, test loss=0.032023534178733826
fold=10, train loss=0.049825627356767654, test loss=0.05916319787502289
#############################

#######Cross validation########

activation function=tanh, hidden_layers=[4, 4, 4]
fold=1, train loss=0.048552095890045166, test loss=0.0197621900588274
fold=2, train loss=0.04191494733095169, test loss=0.14714765548706055
fold=3, train loss=0.04897855222225189, test loss=0.08834874629974365
fold=4, train loss=0.048023391515016556, test loss=0.028164532035589218
fold=5, train loss=0.04774867370724678, test loss=0.040228333324193954
fold=6, train loss=0.04695101082324982, test loss=0.06434657424688339
fold=7, train loss=0.05175372213125229, test loss=0.046586669981479645
fold=8, train loss=0.03533235564827919, test loss=0.21180686354637146
fold=9, train loss=0.04788963496685028, test loss=0.016798589378595352
fold=10, train loss=0.04524747282266617, test loss=0.04939408227801323
#############################
""".split("#############################")

for part in text:
  lines = part.split("\n")
  loss = 0.0
  total = 0
  for line in lines:
    if line.find("activation function") != -1:
      print(line)
    elif line.find("test loss") != -1:
      loss+=float(line.split("test loss=")[1])
      total+=1
  if total != 0:
    print(f"Avg loss={loss/total}")

activation function=relu, hidden_layers=[4]
Avg loss=0.06176964193582535
activation function=relu, hidden_layers=[4, 4]
Avg loss=0.06398595683276653
activation function=relu, hidden_layers=[4, 4, 4]
Avg loss=0.06851736614480615
activation function=sigmoid, hidden_layers=[4]
Avg loss=0.08923555109649897
activation function=sigmoid, hidden_layers=[4, 4]
Avg loss=0.06510685496032238
activation function=sigmoid, hidden_layers=[4, 4, 4]
Avg loss=0.06849266029894352
activation function=tanh, hidden_layers=[4]
Avg loss=0.1131756141781807
activation function=tanh, hidden_layers=[4, 4]
Avg loss=0.07208675146102905
activation function=tanh, hidden_layers=[4, 4, 4]
Avg loss=0.07125842366367578
