In [12]:
import torch
from typing import List, Tuple
import plotly.graph_objects as go

In [2]:
def sigmoid(x: torch.Tensor) -> torch.Tensor:
    """Compute the sigmoid activation function."""
    return 1 / (1 + torch.exp(-x))

def negative_log_likelihood(predictions: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
    """
    Calculate the negative log likelihood loss.

    Parameters:
    predictions (torch.Tensor): Predicted probabilities.
    labels (torch.Tensor): True labels.

    Returns:
    torch.Tensor: Computed negative log likelihood.
    """
    predictions = torch.clamp(predictions, min=1e-10, max=1-1e-10)
    nll = -torch.mean(labels * torch.log(predictions) + (1 - labels) * torch.log(1 - predictions))
    return nll

def train_neuron(features: List[List[float]], 
                 labels: List[float], 
                 initial_weights: List[float], 
                 initial_bias: float, 
                 learning_rate: float, 
                 epochs: int) -> Tuple[List[float], float, List[float]]:
    """
    Train a simple neuron using gradient descent.

    Parameters:
    features (List[List[float]]): Input feature data.
    labels (List[float]): True output labels.
    initial_weights (List[float]): Initial weights for the neuron.
    initial_bias (float): Initial bias for the neuron.
    learning_rate (float): Learning rate for weight updates.
    epochs (int): Number of training iterations.

    Returns:
    Tuple[List[float], float, List[float]]: Updated weights, updated bias, and a list of negative log likelihood (NLL) values per epoch.
    """
    weights = torch.tensor(initial_weights, dtype=torch.float32, requires_grad=True)
    bias = torch.tensor(initial_bias, dtype=torch.float32, requires_grad=True)
    features = torch.tensor(features, dtype=torch.float32)
    labels = torch.tensor(labels, dtype=torch.float32)
    nll_values = []

    for epoch in range(epochs):
        weights.grad = None
        bias.grad = None

        linear_output = torch.matmul(features, weights) + bias
        predictions = sigmoid(linear_output)

        nll = negative_log_likelihood(predictions, labels)
        nll_values.append(round(nll.item(), 4))

        nll.backward()

        with torch.no_grad():
            weights -= learning_rate * weights.grad
            bias -= learning_rate * bias.grad

    updated_weights = torch.round(weights, decimals=4).tolist()
    updated_bias = round(bias.item(), 4)

    return updated_weights, updated_bias, nll_values


In [3]:
features = [[1.0, 2.0], [2.0, 1.0], [-1.0, -2.0]]
labels = [1, 0, 0]
initial_weights = [0.1, -0.2]
initial_bias = 0.0
learning_rate = 0.1
epochs = 2

updated_weights, updated_bias, nll_values = train_neuron(features, labels, initial_weights, initial_bias, learning_rate, epochs)

print("Updated weights:", updated_weights)
print("Updated bias:", updated_bias)
print("NLL values per epoch:", nll_values)

Updated weights: [0.10700000077486038, -0.08470000326633453]
Updated bias: -0.0335
NLL values per epoch: [0.8006, 0.7631]


In [4]:
features = [
    [0.5, 1.5, 2.0], 
    [1.0, 2.0, 3.0], 
    [1.5, 2.5, 3.5], 
    [2.0, 1.0, 0.5], 
    [-1.0, -1.5, -2.0], 
    [-1.5, -2.0, -2.5], 
    [0.0, 0.5, 1.0], 
    [2.0, 3.0, 4.0], 
    [-2.0, -2.5, -3.0], 
    [3.0, 4.0, 5.0]
]
labels = [1, 1, 1, 0, 0, 0, 1, 1, 0, 1]
initial_weights = [0.2, -0.1, 0.4]
initial_bias = 0.0
learning_rate = 0.05
epochs = 50

updated_weights, updated_bias, nll_values = train_neuron(features, labels, initial_weights, initial_bias, learning_rate, epochs)

print("Updated weights:", updated_weights)
print("Updated bias:", updated_bias)
print("NLL values per epoch:", nll_values)

Updated weights: [0.08669999986886978, 0.1395999938249588, 0.885699987411499]
Updated bias: -0.0721
NLL values per epoch: [0.3515, 0.3335, 0.319, 0.3071, 0.2971, 0.2886, 0.2813, 0.2749, 0.2692, 0.2641, 0.2596, 0.2555, 0.2517, 0.2483, 0.2451, 0.2421, 0.2394, 0.2368, 0.2344, 0.2321, 0.2299, 0.2278, 0.2258, 0.224, 0.2221, 0.2204, 0.2187, 0.2171, 0.2155, 0.214, 0.2126, 0.2111, 0.2097, 0.2084, 0.207, 0.2057, 0.2045, 0.2032, 0.202, 0.2008, 0.1997, 0.1985, 0.1974, 0.1963, 0.1952, 0.1941, 0.193, 0.192, 0.1909, 0.1899]


In [5]:
def train_neuron_MSE(features: List[List[float]], 
                 labels: List[float], 
                 initial_weights: List[float], 
                 initial_bias: float, 
                 learning_rate: float, 
                 epochs: int) -> Tuple[List[float], float, List[float]]:
    """
    Train a simple neuron using gradient descent.

    Parameters:
    features (List[List[float]]): Input feature data.
    labels (List[float]): True output labels.
    initial_weights (List[float]): Initial weights for the neuron.
    initial_bias (float): Initial bias for the neuron.
    learning_rate (float): Learning rate for weight updates.
    epochs (int): Number of training iterations.

    Returns:
    Tuple[List[float], float, List[float]]: Updated weights, updated bias, and a list of mean squared errors (MSE) per epoch.
    """
    weights = torch.tensor(initial_weights, dtype=torch.float32, requires_grad=True)
    bias = torch.tensor(initial_bias, dtype=torch.float32, requires_grad=True)
    features = torch.tensor(features, dtype=torch.float32)
    labels = torch.tensor(labels, dtype=torch.float32)
    mse_values = []

    for epoch in range(epochs):
        weights.grad = None
        bias.grad = None

        linear_output = torch.matmul(features, weights) + bias
        predictions = sigmoid(linear_output)

        mse = torch.mean((predictions - labels) ** 2)
        mse_values.append(round(mse.item(), 4))

        mse.backward()

        with torch.no_grad():
            weights -= learning_rate * weights.grad
            bias -= learning_rate * bias.grad

        updated_weights = torch.round(weights, decimals=4).tolist()
        updated_bias = round(bias.item(), 4)

    return updated_weights, updated_bias, mse_values

In [6]:
features = [
    [0.5, 1.5, 2.0], 
    [1.0, 2.0, 3.0], 
    [1.5, 2.5, 3.5], 
    [2.0, 1.0, 0.5], 
    [-1.0, -1.5, -2.0], 
    [-1.5, -2.0, -2.5], 
    [0.0, 0.5, 1.0], 
    [2.0, 3.0, 4.0], 
    [-2.0, -2.5, -3.0], 
    [3.0, 4.0, 5.0]
]
labels = [1, 1, 1, 0, 0, 0, 1, 1, 0, 1]
initial_weights = [0.2, -0.1, 0.4]
initial_bias = 0.0
learning_rate = 0.005
epochs = 50

updated_weights_mse, updated_bias_mse, mse_values = train_neuron_MSE(features, labels, initial_weights, initial_bias, learning_rate, epochs)

print("Updated weights:", updated_weights_mse)
print("Updated bias:", updated_bias_mse)
print("Loss values per epoch:", mse_values)

Updated weights: [0.20260000228881836, -0.07680000364780426, 0.43860000371932983]
Updated bias: -0.0014
Loss values per epoch: [0.0979, 0.0977, 0.0975, 0.0973, 0.0971, 0.0969, 0.0967, 0.0965, 0.0963, 0.0961, 0.0959, 0.0958, 0.0956, 0.0954, 0.0952, 0.095, 0.0949, 0.0947, 0.0945, 0.0943, 0.0942, 0.094, 0.0938, 0.0937, 0.0935, 0.0933, 0.0932, 0.093, 0.0929, 0.0927, 0.0926, 0.0924, 0.0923, 0.0921, 0.092, 0.0918, 0.0917, 0.0915, 0.0914, 0.0912, 0.0911, 0.091, 0.0908, 0.0907, 0.0905, 0.0904, 0.0903, 0.0901, 0.09, 0.0899]


Linear separable

In [45]:
features_linear = [
    [1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [1.5, 2.5], [2.5, 3.5],
    [-1.0, -2.0], [-2.0, -3.0], [-3.0, -4.0], [-1.5, -2.5], [-2.5, -3.5]
]
labels_linear = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
initial_weights = [0.2, -0.2]
initial_bias = 0.0
learning_rate = 0.05
epochs = 50

updated_weights, updated_bias, nll_values = train_neuron(features_linear, labels_linear, initial_weights, initial_bias, learning_rate, epochs)

print("Updated weights:", updated_weights)
print("Updated bias:", updated_bias)
print("NLL values per epoch:", nll_values)

updated_weights_mse, updated_bias_mse, mse_values = train_neuron_MSE(features_linear, labels_linear, initial_weights, initial_bias, learning_rate, epochs)

print("Updated weights:", updated_weights_mse)
print("Updated bias:", updated_bias_mse)
print("Loss values per epoch:", mse_values)


Updated weights: [0.7427999973297119, 0.6585999727249146]
Updated bias: 0.0
NLL values per epoch: [0.7981, 0.6187, 0.4959, 0.4103, 0.3486, 0.3026, 0.2672, 0.2393, 0.2167, 0.1981, 0.1825, 0.1693, 0.1579, 0.148, 0.1393, 0.1317, 0.1248, 0.1187, 0.1132, 0.1082, 0.1036, 0.0994, 0.0956, 0.092, 0.0888, 0.0857, 0.0829, 0.0803, 0.0778, 0.0755, 0.0733, 0.0713, 0.0694, 0.0675, 0.0658, 0.0642, 0.0627, 0.0612, 0.0598, 0.0584, 0.0572, 0.056, 0.0548, 0.0537, 0.0526, 0.0516, 0.0506, 0.0497, 0.0488, 0.0479]
Updated weights: [0.5497000217437744, 0.352400004863739]
Updated bias: 0.0
Loss values per epoch: [0.3023, 0.2559, 0.2168, 0.1849, 0.1593, 0.1388, 0.1223, 0.1089, 0.0979, 0.0888, 0.0812, 0.0747, 0.0691, 0.0643, 0.0601, 0.0564, 0.0532, 0.0503, 0.0477, 0.0453, 0.0432, 0.0412, 0.0395, 0.0379, 0.0364, 0.035, 0.0337, 0.0325, 0.0314, 0.0304, 0.0294, 0.0285, 0.0277, 0.0269, 0.0261, 0.0254, 0.0247, 0.0241, 0.0235, 0.0229, 0.0224, 0.0218, 0.0213, 0.0209, 0.0204, 0.02, 0.0196, 0.0192, 0.0188, 0.0184]


In [46]:
import numpy as np
fig = go.Figure()
t_a, t_b = np.array(features_linear)[:, 0], np.array(features_linear)[:, 1]
fig.add_trace(go.Scatter3d(
    x=t_a,
    y=t_b,
    z=labels_linear,
    mode='markers',
    marker=dict(size=5, color='red'),
    name='Точки'
))

x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)

Z = sigmoid(torch.Tensor(X * updated_weights[0] + Y * updated_weights[1] + updated_bias))

fig.add_trace(go.Surface(
    x=X, 
    y=Y, 
    z=Z,
    colorscale='Viridis',
    opacity=0.7,
    name='NLL'
))

Z1 = sigmoid(torch.Tensor(X * updated_weights_mse[0] + Y * updated_weights_mse[1] + updated_bias_mse))

fig.add_trace(go.Surface(
    x=X, 
    y=Y, 
    z=Z1,
    colorscale='Viridis',
    opacity=0.7,
    name='MSE'
))

fig.update_layout(
    scene=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z'
    ),
    title="Различия функций потерь",
)

fig.show()

In [38]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0,
                           n_clusters_per_class=1, weights=[0.99], class_sep=2, random_state=42)

updated_weights, updated_bias, nll_values = train_neuron(X, y, initial_weights, initial_bias, learning_rate, epochs)

print("Updated weights:", updated_weights)
print("Updated bias:", updated_bias)
print("NLL values per epoch:", nll_values)

updated_weights_mse, updated_bias_mse, mse_values = train_neuron_MSE(X, y, initial_weights, initial_bias, learning_rate, epochs)

print("Updated weights:", updated_weights_mse)
print("Updated bias:", updated_bias_mse)
print("Loss values per epoch:", mse_values)

Updated weights: [-0.5763000249862671, 0.6705999970436096]
Updated bias: -0.4042
NLL values per epoch: [1.1635, 0.9658, 0.8068, 0.681, 0.582, 0.5039, 0.4418, 0.392, 0.3515, 0.3181, 0.2904, 0.2671, 0.2473, 0.2303, 0.2156, 0.2027, 0.1914, 0.1815, 0.1726, 0.1646, 0.1575, 0.1511, 0.1452, 0.1399, 0.135, 0.1306, 0.1265, 0.1227, 0.1192, 0.1159, 0.1129, 0.1101, 0.1074, 0.105, 0.1027, 0.1005, 0.0984, 0.0965, 0.0947, 0.093, 0.0914, 0.0898, 0.0884, 0.087, 0.0857, 0.0844, 0.0832, 0.0821, 0.081, 0.0799]
Updated weights: [-0.3528999984264374, 0.39750000834465027]
Updated bias: -0.2843
Loss values per epoch: [0.4718, 0.4342, 0.3964, 0.3594, 0.3239, 0.2907, 0.2604, 0.2331, 0.2089, 0.1877, 0.1692, 0.1532, 0.1393, 0.1273, 0.1168, 0.1077, 0.0997, 0.0927, 0.0865, 0.081, 0.0762, 0.0718, 0.0679, 0.0644, 0.0613, 0.0584, 0.0558, 0.0534, 0.0512, 0.0492, 0.0474, 0.0457, 0.0441, 0.0427, 0.0413, 0.04, 0.0389, 0.0378, 0.0367, 0.0358, 0.0348, 0.034, 0.0332, 0.0324, 0.0317, 0.031, 0.0304, 0.0297, 0.0292, 0.0286]


In [43]:
import numpy as np
fig = go.Figure()
t_a, t_b = np.array(X)[:, 0], np.array(X)[:, 1]
fig.add_trace(go.Scatter3d(
    x=t_a,
    y=t_b,
    z=y,
    mode='markers',
    marker=dict(size=5, color='red'),
    name='Точки'
))

xx = np.linspace(-10, 10, 100)
yy = np.linspace(-10, 10, 100)
XX, Y = np.meshgrid(xx, yy)

Z = sigmoid(torch.Tensor(XX * updated_weights[0] + Y * updated_weights[1] + updated_bias))

fig.add_trace(go.Surface(
    x=XX, 
    y=Y, 
    z=Z,
    colorscale='Viridis',
    opacity=0.7,
    name='NLL'
))

Z1 = sigmoid(torch.Tensor(XX * updated_weights_mse[0] + Y * updated_weights_mse[1] + updated_bias_mse))

fig.add_trace(go.Surface(
    x=XX, 
    y=Y, 
    z=Z1,
    colorscale='Viridis',
    opacity=0.7,
    name='MSE'
))

fig.update_layout(
    scene=dict(
        xaxis_title='X',
        yaxis_title='Y',
        zaxis_title='Z'
    ),
    title="Различия функций потерь",
)

fig.show()