In [1]:
!pip install jax
!pip install jaxopt
!pip install mxnet



**Backpropagation в PyTorch CPU**

In [2]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Определяем параметры модели
input_size = 2
hidden_size = 10
output_size = 2
device = torch.device('cpu')

# Создаем модель
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Создаем наборы данных
X = torch.tensor([[0.2, 0.1], [0.4, 0.7], [0.3, 0.5], [0.7, 0.9], [0.8, 0.4], [0.6, 0.2]], dtype=torch.float32)
y = torch.tensor([0, 1, 1, 0, 0, 1], dtype=torch.long)

# Создаем наборы данных для загрузки в DataLoader
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

# Создаем экземпляр модели и функцию потерь и определяем оптимизатор
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
model.to(device)

# Обучаем модель
start_time = time.time()
for epoch in range(1000):
    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    if epoch % 100 == 0:
        print(f"Epoch {epoch} | Loss: {running_loss / len(dataset):.6f}")
    elif epoch == 999:
        print(f"Epoch {epoch+1} | Loss: {running_loss / len(dataset):.6f}")
end_time = time.time()

# Проверяем результаты
with torch.no_grad():
    outputs = model(X)
    _, predicted = torch.max(outputs.data, 1)
    print("Predicted labels:", predicted)

print(f'total training time {end_time - start_time}')

Epoch 0 | Loss: 0.357817
Epoch 100 | Loss: 0.341468
Epoch 200 | Loss: 0.336804
Epoch 300 | Loss: 0.329916
Epoch 400 | Loss: 0.322687
Epoch 500 | Loss: 0.312558
Epoch 600 | Loss: 0.302015
Epoch 700 | Loss: 0.290985
Epoch 800 | Loss: 0.276702
Epoch 900 | Loss: 0.262132
Epoch 1000 | Loss: 0.242933
Predicted labels: tensor([1, 1, 1, 0, 0, 1])
total training time 1.8945322036743164


**Backpropagation в TensorFlow CPU**

In [3]:
import tensorflow as tf
import time

with tf.device('cpu'):
    X = [[0.2, 0.1], [0.4, 0.7], [0.3, 0.5], [0.7, 0.9], [0.8, 0.4], [0.6, 0.2]]
    y = [0, 1, 1, 0, 0, 1]

    num_hidden = 10
    num_epochs = 1000
    batch_size = 2

    # Создаем граф вычислений
    tf.keras.backend.clear_session()

    # Определяем входные данные
    X_placeholder = tf.keras.layers.Input(shape=(2,), name='input')

    # Определяем веса
    W1 = tf.Variable(tf.random.normal([2, num_hidden]), name="W1")
    W2 = tf.Variable(tf.random.normal([num_hidden, 1]), name="W2")

    # Определяем смещения
    b1 = tf.Variable(tf.zeros([num_hidden]), name="b1")
    b2 = tf.Variable(tf.zeros([1]), name="b2")

    # Определяем скрытый слой
    hidden_layer = tf.nn.sigmoid(tf.matmul(X_placeholder, W1) + b1)

    # Определяем выходной слой
    output_layer = tf.nn.sigmoid(tf.matmul(hidden_layer, W2) + b2)

    # Определяем функцию потерь
    loss = tf.reduce_mean(tf.square(output_layer - y))

    # Определяем оптимизатор
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

    # Определяем модель
    model = tf.keras.Model(inputs=X_placeholder, outputs=output_layer)

    # Компилируем модель
    model.compile(optimizer=optimizer, loss='mse')

    # Обучаем модель
    start_time = time.time()
    model.fit(x=X, y=y, epochs=num_epochs, batch_size=batch_size, verbose=0)
    end_time = time.time()

    # Получаем предсказания
    predictions = model.predict(X)

    print(predictions[0])
    for i in range(len(X)):
        print(f'input sample: {X[i]}, probability: {predictions[i][0]}')

    print(f'total training time {end_time - start_time}')

[0.5085831]
input sample: [0.2, 0.1], probability: 0.508583128452301
input sample: [0.4, 0.7], probability: 0.679873526096344
input sample: [0.3, 0.5], probability: 0.6171969175338745
input sample: [0.7, 0.9], probability: 0.7931803464889526
input sample: [0.8, 0.4], probability: 0.7907161712646484
input sample: [0.6, 0.2], probability: 0.7056461572647095
total training time 23.718749523162842


**Backpropagation в JAX CPU**

In [4]:
import jax
import jax.numpy as jnp
import jax.random as random
import time
import numpy as np

jax.devices('cpu')

# Определяем sigmoid функцию
def sigmoid(x):
    return 1 / (1 + jnp.exp(-x))

# Опредделяем параметры нейросети
num_hidden = 10
key = random.PRNGKey(0)
W1 = random.normal(key, (2, num_hidden))
W2 = random.normal(key, (num_hidden, 1))
b1 = jnp.zeros((num_hidden,))
b2 = jnp.zeros((1,))

# Определяем функцию потерь
def loss(params, x, y):
    W1, b1, W2, b2 = params
    hidden_layer = sigmoid(jnp.dot(x, W1) + b1)
    output_layer = sigmoid(jnp.dot(hidden_layer, W2) + b2)
    return jnp.mean(jnp.square(output_layer - y))

# Определяем оптимизатор
def update(params, x, y, lr=0.01):
    grads = jax.grad(loss)(params, x, y)
    return [param - lr * grad for param, grad in zip(params, grads)]

# Тренируем модель
X = jnp.array([[0.2, 0.1], [0.4, 0.7], [0.3, 0.5], [0.7, 0.9], [0.8, 0.4], [0.6, 0.2]])
y = jnp.array([0, 1, 1, 0, 0, 1])

params = [W1, b1, W2, b2]
num_epochs = 1000
batch_size = 2
lr = 0.01

start_time = time.time()
for epoch in range(num_epochs):
    for i in range(0, len(X), batch_size):
        batch_X = X[i:i+batch_size]
        batch_y = y[i:i+batch_size]
        params = update(params, batch_X, batch_y, lr=lr)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}, loss {loss(params, X, y):.6f}")
end_time = time.time()


predictions = sigmoid(jnp.dot(sigmoid(jnp.dot(X, W1) + b1), W2) + b2)

for i in range(len(X)):
    print(f"Input sample: {X[i]}, probability: {predictions[i][0]:.6f}")

print(f"Total training time {end_time - start_time:.3f} seconds")

Epoch 100, loss 0.251842
Epoch 200, loss 0.251176
Epoch 300, loss 0.251122
Epoch 400, loss 0.251094
Epoch 500, loss 0.251067
Epoch 600, loss 0.251042
Epoch 700, loss 0.251017
Epoch 800, loss 0.250993
Epoch 900, loss 0.250970
Epoch 1000, loss 0.250947
Input sample: [0.2 0.1], probability: 0.330406
Input sample: [0.4 0.7], probability: 0.392539
Input sample: [0.3 0.5], probability: 0.374074
Input sample: [0.7 0.9], probability: 0.419339
Input sample: [0.8 0.4], probability: 0.390050
Input sample: [0.6 0.2], probability: 0.359054
Total training time 76.224 seconds


No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


**Backpropagation в MXNet CPU**

In [5]:
import mxnet as mx
from mxnet import nd, autograd, gluon

# Определяем параметры
learning_rate = 0.01
epochs = 1000
batch_size = 2
num_inputs = 2
num_outputs = 1
num_hidden = 10

X = nd.array([[0.2, 0.1], [0.4, 0.7], [0.3, 0.5], [0.7, 0.9], [0.8, 0.4], [0.6, 0.2]])
y = nd.array([0, 1, 1, 0, 0, 1])

train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y), batch_size=batch_size, shuffle=True)

# Определяем модель нейросети
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs, activation="sigmoid"))
net.initialize()

# Определяем функцию потерь
loss = gluon.loss.SigmoidBinaryCrossEntropyLoss()

# Определяем оптимизатор
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': learning_rate})

# Тренируем модель
start_time = time.time()
for epoch in range(epochs):
    cumulative_loss = 0
    for data, label in train_data:
        with autograd.record():
            output = net(data)
            L = loss(output, label)
        L.backward()
        trainer.step(batch_size)
        cumulative_loss += nd.sum(L).asscalar()
    avg_loss = cumulative_loss / X.shape[0]
    if epoch % 100 == 0:
        print(f"Epoch {epoch} | Loss: {avg_loss}")
    elif epoch == 999:
        print(f"Epoch {epoch+1} | Loss: {avg_loss}")
end_time = time.time()

predicted_classes = (net(X) > 0.5).reshape((-1,))
for i in range(len(X)):
    print(f'input sample: {X.asnumpy()[i]}, probability: {predicted_classes.asnumpy()[i]}')

print(f'total training time {end_time - start_time}')

Epoch 0 | Loss: 0.725563128789266
Epoch 100 | Loss: 0.6928676764170328
Epoch 200 | Loss: 0.6911760369936625
Epoch 300 | Loss: 0.68854288260142
Epoch 400 | Loss: 0.6824089686075846
Epoch 500 | Loss: 0.6671608686447144
Epoch 600 | Loss: 0.6437016924222311
Epoch 700 | Loss: 0.6202540000279745
Epoch 800 | Loss: 0.6016271511713663
Epoch 900 | Loss: 0.5898056030273438
Epoch 1000 | Loss: 0.5825513402620951
input sample: [0.2 0.1], probability: 0.0
input sample: [0.4 0.7], probability: 1.0
input sample: [0.3 0.5], probability: 1.0
input sample: [0.7 0.9], probability: 0.0
input sample: [0.8 0.4], probability: 0.0
input sample: [0.6 0.2], probability: 0.0
total training time 9.975848197937012
