In [96]:

import torch
import torch.nn as nn
import torch.optim as optim

# Define a custom neural network class
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__()
        
        # Define the network layers
        self.linear1 = nn.Linear(input_size, hidden_size) # First linear layer
        self.activation1 = nn.ReLU() # Activation function after the first layer
        self.linear2 = nn.Linear(hidden_size, output_size) # Second linear layer
        self.softmax = nn.Softmax(dim=1) # Softmax layer for the output
        self.logsoftmax = nn.LogSoftmax(dim=1) # LogSoftmax layer for the output
    
    def forward(self, x):
        # Define the forward pass
        x = self.linear1(x)
        x = self.activation1(x)
        x = self.linear2(x)
        x = self.softmax(x)
        x = self.logsoftmax(x)
        return x

# Create an instance of the network
input_size = 10
hidden_size = 5
output_size = 3
model = SimpleNN(input_size, hidden_size, output_size)

# Create a loss function
criterion = nn.NLLLoss()

# Create an optimizer
# optimizer = optim.SGD(model.parameters(), lr=0.01)

# Define a batch of inputs and targets
batch_size = 4
input_data = torch.randn(batch_size, input_size) # Random input tensor with batch size
targets = torch.randint(0, output_size, (batch_size,)) # Random target tensor for each input in the batch

# Forward pass
outputs = model(input_data)

# Compute loss
loss = criterion(outputs, targets)

# Backward pass
# loss.backward()

# Update weights
# optimizer.step()

# Optionally, print output and loss
print(f"Outputs:\n{outputs}")
print(f"Loss: {loss.item()}")


Outputs:
tensor([[-1.0158, -1.1004, -1.1869],
        [-0.9717, -1.1563, -1.1812],
        [-1.0406, -1.0731, -1.1880],
        [-1.0262, -1.0985, -1.1767]], grad_fn=<LogSoftmaxBackward0>)
Loss: 1.1553728580474854


In [97]:
# random array and save it to a file .npy
# import numpy as np

# B = 1000
# N = 100
# M = 30
# X = np.random.rand(B, N).astype(np.float32)
# W = np.random.rand(M,N).astype(np.float32)
# bias = np.random.rand(M).astype(np.float32)
# np.save('../with-torch-tests/linear-layer/X.npy', X)
# np.save('../with-torch-tests/linear-layer/W.npy', W)
# np.save('../with-torch-tests/linear-layer/bias.npy', bias)

# l = nn.Linear(N,M)
# l.weight.data = torch.from_numpy(W)
# l.bias.data = torch.from_numpy(bias)
# X_torch = torch.from_numpy(X)
# Y = l(X_torch)
# np.save('../with-torch-tests/linear-layer/Y.npy', Y.detach().numpy())
# print(Y[0,0:5])


In [98]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# Load the input data
ln1b = np.load('../with-torch-tests/all-model-cpu/ln1b.npy')
ln1w = np.load('../with-torch-tests/all-model-cpu/ln1w.npy')
ln2b = np.load('../with-torch-tests/all-model-cpu/ln2b.npy')
ln2w = np.load('../with-torch-tests/all-model-cpu/ln2w.npy')
target = np.load('../with-torch-tests/all-model-cpu/target.npy')
X_c = np.load('../with-torch-tests/all-model-cpu/X_c.npy')

# get sizes
B,N = X_c.shape
H1,N = ln1w.shape
l1 = nn.Linear(N,H1)
l1.weight.data = torch.from_numpy(ln1w).to(torch.float32)
l1.bias.data = torch.from_numpy(ln1b).to(torch.float32)


H2,_ = ln2w.shape
l2 = nn.Linear(H1,H2)
l2.weight.data = torch.from_numpy(ln2w).to(torch.float32)
l2.bias.data = torch.from_numpy(ln2b).to(torch.float32)

X = torch.from_numpy(X_c).to(torch.float32)
Y1 = l1(X)
y1_relu = F.relu(Y1)
Y2 = l2(y1_relu)
criterion = nn.CrossEntropyLoss(reduction='sum')
loss = criterion(Y2, torch.from_numpy(target).long())
loss

tensor(160.7092, grad_fn=<NllLossBackward0>)

In [29]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
def pp(a1,a2):
            print(a1[0,0:5])
            print(a2[0,0:5])
class DataLoaderModule:
    def __init__(self, data_path):
        self.data_path = data_path
        self.load_data()

    def load_data(self):
        self.a1 = np.load(f'{self.data_path}/a1.npy')
        self.da1 = np.load(f'{self.data_path}/da1.npy')
        self.dln1 = np.load(f'{self.data_path}/dln1.npy')
        self.dln2 = np.load(f'{self.data_path}/dln2.npy')
        self.dsm = np.load(f'{self.data_path}/dsm.npy')
        self.ln1 = np.load(f'{self.data_path}/ln1.npy')
        self.ln1b_grad = np.load(f'{self.data_path}/ln1b_grad.npy')
        self.ln1b = np.load(f'{self.data_path}/ln1b.npy')
        self.ln1w_grad = np.load(f'{self.data_path}/ln1w_grad.npy')
        self.ln1w = np.load(f'{self.data_path}/ln1w.npy')
        self.ln2 = np.load(f'{self.data_path}/ln2.npy')
        self.ln2b_grad = np.load(f'{self.data_path}/ln2b_grad.npy')
        self.ln2b = np.load(f'{self.data_path}/ln2b.npy')
        self.ln2w_grad = np.load(f'{self.data_path}/ln2w_grad.npy')
        self.ln2w = np.load(f'{self.data_path}/ln2w.npy')
        
        # pp(self.ln1w, self.ln1w)
        
        self.target = np.load(f'{self.data_path}/target.npy').astype(np.int64)
        self.X_c = np.load(f'{self.data_path}/X_c.npy')
        self.updated_ln1b = np.load(f'{self.data_path}/updated_ln1b.npy')
        self.updated_ln1w = np.load(f'{self.data_path}/updated_ln1w.npy')
        self.updated_ln2b = np.load(f'{self.data_path}/updated_ln2b.npy')
        self.updated_ln2w = np.load(f'{self.data_path}/updated_ln2w.npy')

class Model(nn.Module):
    def __init__(self, ln1w, ln1b, ln2w, ln2b):
        super(Model, self).__init__()
        N, H1 = ln1w.shape
        H2, _ = ln2w.shape
        
        self.l1 = nn.Linear(N, H1)
        self.l1.weight.data = torch.from_numpy(ln1w).float()
        self.l1.bias.data = torch.from_numpy(ln1b).float()

        self.l2 = nn.Linear(H1, H2)
        self.l2.weight.data = torch.from_numpy(ln2w).float()
        self.l2.bias.data = torch.from_numpy(ln2b).float()

    def forward(self, x):
        y1 = self.l1(x)
        y1_relu = F.relu(y1)
        y2 = self.l2(y1_relu)
        return y1, y1_relu, y2

class Trainer:
    def __init__(self, model, data_loader, criterion, optimizer):
        self.model = model
        self.data_loader = data_loader
        self.criterion = criterion
        self.optimizer = optimizer
    
    def train(self):
        X = torch.from_numpy(self.data_loader.X_c).float().requires_grad_(True)
        target = torch.from_numpy(self.data_loader.target).long()
        
        y1, y1_relu, y2 = self.model(X)

        # Retain gradients
        y1.retain_grad()
        y1_relu.retain_grad()
        y2.retain_grad()

        loss = self.criterion(y2, target)
        print("loss ", loss.item())

        # Forward pass comparison
        print("Forward")
        print(np.allclose(y1.detach().numpy(), self.data_loader.ln1, atol=1e-6))
        print(np.allclose(y1_relu.detach().numpy(), self.data_loader.a1, atol=1e-6))
        print(np.allclose(y2.detach().numpy(), self.data_loader.ln2, atol=1e-4))

        # Backward pass
        self.optimizer.zero_grad()
        loss.backward()

        # Compare the gradients
        print("Back")
        print(np.allclose(y2.grad.numpy(), self.data_loader.dsm, atol=1e-4))
        print(np.allclose(y1_relu.grad.numpy(), self.data_loader.dln2, atol=1e-4))
        print(np.allclose(self.model.l2.weight.grad.numpy(), self.data_loader.ln2w_grad, atol=1e-4))
        print(np.allclose(self.model.l2.bias.grad.numpy(), self.data_loader.ln2b_grad, atol=1e-4))
        print(np.allclose(y1.grad.numpy(), self.data_loader.da1, atol=1e-4))
        print(np.allclose(self.model.l1.weight.grad.numpy(), self.data_loader.ln1w_grad, atol=1e-4))
        print(np.allclose(self.model.l1.bias.grad.numpy(), self.data_loader.ln1b_grad, atol=1e-4))
        
        
        # optimzer 
        self.optimizer.step()
        
        # print the updated weights
        print("Updated weights")
        print(np.allclose(self.model.l1.weight.detach().numpy(), self.data_loader.updated_ln1w, atol=1e-2))
        # print first elements in both 
        
        # pp(self.model.l1.weight.detach().numpy(), self.data_loader.updated_ln1w)
        
        print(np.allclose(self.model.l1.bias.detach().numpy(), self.data_loader.updated_ln1b, atol=1e-4))
        print(np.allclose(self.model.l2.weight.detach().numpy(), self.data_loader.updated_ln2w, atol=1e-4))
        print(np.allclose(self.model.l2.bias.detach().numpy(), self.data_loader.updated_ln2b, atol=1e-4))
def main():
    data_loader = DataLoaderModule('../with-torch-tests/trained-model-cpu')
    
    model = Model(data_loader.ln1w, data_loader.ln1b, data_loader.ln2w, data_loader.ln2b)
    
    criterion = nn.CrossEntropyLoss(reduction='sum')
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
     
    trainer = Trainer(model, data_loader, criterion, optimizer)
    trainer.train()

if __name__ == "__main__":
    main()


loss  3065.73193359375
Forward
False
False
True
Back
True
True
True
True
True
True
True
Updated weights
True
True
True
True


In [12]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

class ParamsDownloader:
    def __init__(self, data_path):
        self.data_path = data_path
        self.load_data()

    def load_data(self):
        self.ln1b = np.load(f'{self.data_path}/ln1b.npy')
        self.ln1w = np.load(f'{self.data_path}/ln1w.npy')
        self.ln2b = np.load(f'{self.data_path}/ln2b.npy')
        self.ln2w = np.load(f'{self.data_path}/ln2w.npy')
        


class Model(nn.Module):
    def __init__(self, ln1w, ln1b, ln2w, ln2b):
        super(Model, self).__init__()
        N, H1 = ln1w.shape
        H2, _ = ln2w.shape
        
        self.l1 = nn.Linear(N, H1)
        self.l1.weight.data = torch.from_numpy(ln1w).float()
        self.l1.bias.data = torch.from_numpy(ln1b).float()

        self.l2 = nn.Linear(H1, H2)
        self.l2.weight.data = torch.from_numpy(ln2w).float()
        self.l2.bias.data = torch.from_numpy(ln2b).float()

    def forward(self, x):
        y1 = self.l1(x)
        y1_relu = F.relu(y1)
        y2 = self.l2(y1_relu)
        return y1, y1_relu, y2
    
    
X_train = np.load('../dataset/x_train.npy')
y_train = np.load('../dataset/y_train.npy').astype(np.int64)
print(X_train [0,0:5])
# X_test = np.load('../dataset/x_test.npy')
# y_test = np.load('../dataset/y_test.npy')

N_EPOCHS = 10
BATCH_SIZE = 32
train_loader = torch.utils.data.DataLoader(TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).long()), batch_size=BATCH_SIZE, shuffle=False)
data_loader = ParamsDownloader('../with-torch-tests/trained-model-cpu')
model = Model(data_loader.ln1w, data_loader.ln1b, data_loader.ln2w, data_loader.ln2b)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss(reduction='mean')

for epoch in range(N_EPOCHS):
    for i, (X, y) in enumerate(train_loader):
        optimizer.zero_grad()
        y1, y1_relu, y2 = model(X)
        loss = criterion(y2, y)
        loss.backward()
        optimizer.step()
        print(f"Epoch: {epoch}, Batch: {i}, Loss: {loss.item()}")


x_test = np.load('../dataset/x_test.npy')
y_test = np.load('../dataset/y_test.npy')
y1, y1_relu, y2 = model(torch.from_numpy(x_test).float())
y_pred = y2.argmax(dim=1).numpy()
accuracy = (y_pred == y_test).mean()


print(f"Accuracy: {accuracy}")

[0.78198105 0.4786313  0.7654065  0.05289226 0.815995  ]
Epoch: 0, Batch: 0, Loss: 92.91064453125
Epoch: 0, Batch: 1, Loss: 70.51587677001953
Epoch: 0, Batch: 2, Loss: 89.21380615234375
Epoch: 0, Batch: 3, Loss: 55.34000015258789
Epoch: 0, Batch: 4, Loss: 60.79236602783203
Epoch: 0, Batch: 5, Loss: 47.18828582763672
Epoch: 0, Batch: 6, Loss: 52.81427764892578
Epoch: 0, Batch: 7, Loss: 58.2937126159668
Epoch: 0, Batch: 8, Loss: 57.48456954956055
Epoch: 0, Batch: 9, Loss: 41.366065979003906
Epoch: 0, Batch: 10, Loss: 50.02870559692383
Epoch: 0, Batch: 11, Loss: 29.650102615356445
Epoch: 0, Batch: 12, Loss: 61.06648635864258
Epoch: 0, Batch: 13, Loss: 42.18989944458008
Epoch: 0, Batch: 14, Loss: 37.40363693237305
Epoch: 0, Batch: 15, Loss: 49.080379486083984
Epoch: 0, Batch: 16, Loss: 43.439876556396484
Epoch: 0, Batch: 17, Loss: 49.365577697753906
Epoch: 0, Batch: 18, Loss: 42.333492279052734
Epoch: 0, Batch: 19, Loss: 48.9775276184082
Epoch: 0, Batch: 20, Loss: 31.84650993347168
Epoch: 

In [8]:
class ParamsDownloader:
    def __init__(self, data_path):
        self.data_path = data_path
        self.load_data()

    def load_data(self):
        self.ln1b = np.load(f'{self.data_path}/ln1b.npy')
        self.ln1w = np.load(f'{self.data_path}/ln1w.npy')
        self.ln2b = np.load(f'{self.data_path}/ln2b.npy')
        self.ln2w = np.load(f'{self.data_path}/ln2w.npy')
        


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        N = 130
        H1, H2, H3 = 1024, 1024, 16
        self.l1 = nn.Linear(N, H1)
        self.l2 = nn.Linear(H1, H2)
        self.l3 = nn.Linear(H2, H3)

    def forward(self, x):
        y1 = self.l1(x)
        y1_relu = F.relu(y1)
        y2 = self.l2(y1_relu)
        y2_relu = F.relu(y2)
        y3 = self.l3(y2_relu)
        return y3
    
    
X_train = np.load('../dataset/x_test.npy')
y_train = np.load('../dataset/y_test.npy')

# X_test = np.load('../dataset/x_test.npy')
# y_test = np.load('../dataset/y_test.npy')

N_EPOCHS = 5
BATCH_SIZE = 32
train_loader = torch.utils.data.DataLoader(TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).long()), batch_size=BATCH_SIZE, shuffle=False)
model = Model()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss(reduction='mean')
# data_loader = ParamsDownloader('../with-torch-tests/trained-model')

x_test = np.load('../dataset/x_test.npy')
y_test = np.load('../dataset/y_test.npy')
y3 = model(torch.from_numpy(x_test).float())
y_pred = y3.argmax(dim=1).numpy()
accuracy = (y_pred == y_test).mean()
print(accuracy)
for epoch in range(N_EPOCHS):
    for i, (X, y) in enumerate(train_loader):
        optimizer.zero_grad()
        y3 = model(X)
        loss = criterion(y3, y)
        loss.backward()
        optimizer.step()
        # print(f"Epoch: {epoch}, Batch: {i}, Loss: {loss.item()}")
        


x_test = np.load('../dataset/x_test.npy')
y_test = np.load('../dataset/y_test.npy')
y3 = model(torch.from_numpy(x_test).float())
y_pred = y3.argmax(dim=1).numpy()
accuracy = (y_pred == y_test).mean()


print(f"Accuracy: {accuracy}")

TypeError: can't convert np.ndarray of type numpy.uint32. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

In [101]:
# read arrays from .npy files and func Linear to compare
import numpy as np
import torch
import torch.nn as nn

X = np.load('../with-torch-tests/linear-layer/X_C.npy')
W = np.load('../with-torch-tests/linear-layer/W_C.npy')
bias = np.load('../with-torch-tests/linear-layer/bias_C.npy')
Y = np.load('../with-torch-tests/linear-layer/out_C.npy')
B,N = X.shape
_,M = Y.shape
l = nn.Linear(M,N)
l.weight.data = torch.from_numpy(W).to(torch.float32)
l.bias.data = torch.from_numpy(bias).to(torch.float32)
X_torch = torch.from_numpy(X).to(torch.float32)
Y_torch = l(X_torch)

print(Y[0,0:5])
print(Y_torch[0,0:5])
print( np.allclose(Y, Y_torch.detach().numpy(), atol=1e-4, rtol=1e-4))



[-2.4500256  -4.164225    2.0214434   0.48262623  0.05314786]
tensor([-2.4500, -4.1642,  2.0214,  0.4826,  0.0531], grad_fn=<SliceBackward0>)
True


In [102]:
# Relu forward and backward tests
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# Load data
X = np.load(r'../with-torch-tests/relu-layer/X_relu.npy')
Y = np.load(r'../with-torch-tests/relu-layer/out_relu.npy')
dY = np.load(r'../with-torch-tests/relu-layer/up_grad_relu.npy')
dX = np.load(r'../with-torch-tests/relu-layer/down_grad_relu.npy')
print(X.shape, Y.shape, dY.shape, dX.shape)
# Convert to PyTorch tensors
# make sure to set requires_grad=True for the input tensor so that the Autograd engine can compute the gradients
X_torch = torch.from_numpy(X).to(torch.float32).requires_grad_(True)
dY_torch = torch.from_numpy(dY).to(torch.float32)

# Forward pass with ReLU
relu = nn.ReLU()
Y_torch = relu(X_torch)

# Compare the forward pass results
print("Forward pass comparison:")
print("Y (numpy):", Y[0, 0:5])
print("Y_torch:", Y_torch.detach().numpy()[0, 0:5])
print("Match:", np.allclose(Y, Y_torch.detach().numpy(), atol=1e-4, rtol=1e-4))


# Validate the backward pass
Y_torch.backward(dY_torch)

# Get the gradients from X_torch
dX_torch = X_torch.grad

# Compare the backward pass results
print("\nBackward pass comparison:")
print("dX (numpy):", dX[0, 0:5])
print("dX_torch:", dX_torch.numpy()[0, 0:5])
print("Match:", np.allclose(dX, dX_torch.numpy(), atol=1e-4, rtol=1e-4))


(100, 100) (100, 100) (100, 100) (100, 100)
Forward pass comparison:
Y (numpy): [0.09152496 0.         0.3774835  0.         0.59312725]
Y_torch: [0.09152496 0.         0.3774835  0.         0.59312725]
Match: True

Backward pass comparison:
dX (numpy): [-0.9976806   0.          0.29630423  0.         -0.45951718]
dX_torch: [-0.9976806   0.          0.29630423  0.         -0.45951718]
Match: True


In [103]:
m = nn.LogSoftmax(dim=1)
loss = nn.NLLLoss()
# input is of size N x C = 3 x 5
input = torch.randn(3, 5, requires_grad=True)
# each element in target has to have 0 <= value < C
target = torch.tensor([1, 0, 4])
print(target.dtype)
output = loss(m(input), target)
output.backward()
# 2D loss example (used, for example, with image inputs)
N, C = 5, 4
loss = nn.NLLLoss()
# input is of size N x C x height x width
data = torch.randn(N, 16, 10, 10)
conv = nn.Conv2d(16, C, (3, 3))
m = nn.LogSoftmax(dim=1)
# each element in target has to have 0 <= value < C
target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
output = loss(m(conv(data)), target)
output.backward()

torch.int64


In [104]:
import numpy as np
A = np.load(r'../with-torch-tests/matmul/A.npy')
B = np.load(r'../with-torch-tests/matmul/B.npy')
C = np.load(r'../with-torch-tests/matmul/C.npy')
print(A.shape, B.shape, C.shape)
C_py = A @ B
print(C[:5,:5])
print(C_py[:5,:5])
print(np.allclose(C, C_py, atol=1e-4, rtol=1e-4))



(1024, 128) (1024, 128) (1024, 128)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 1024 is different from 128)

In [None]:
# Test backward pass of the Linear layer
import numpy as np
import torch
import torch.nn as nn

# Load the input data
X = np.load('../with-torch-tests/linear-backward/X_c.npy')
W = np.load('../with-torch-tests/linear-backward/W_c.npy')
bias = np.load('../with-torch-tests/linear-backward/bias_c.npy')
upgrad = np.load('../with-torch-tests/linear-backward/up_grad.npy')


# out to comoare to
dLdb = np.load('../with-torch-tests/linear-backward/dLdb.npy')
dLdW = np.load('../with-torch-tests/linear-backward/dLdW.npy')
dLdX = np.load('../with-torch-tests/linear-backward/dLdX.npy')

# get sizes
B,N = X.shape
M,_ = W.shape

print(X.shape, W.shape, bias.shape, upgrad.shape)

lin = nn.Linear(N,M)
lin.weight.data = torch.from_numpy(W).to(torch.float32)
lin.bias.data = torch.from_numpy(bias).to(torch.float32)
# X must be a tensor with requires_grad=True
X_torch = torch.from_numpy(X).to(torch.float32).requires_grad_(True)
upgrad_torch = torch.from_numpy(upgrad).to(torch.float32)

# Forward pass
Y = lin(X_torch)

# Backward pass
Y.backward(upgrad_torch)

# Get the gradients
dLdW_torch = lin.weight.grad
dLdb_torch = lin.bias.grad
dLdX_torch = X_torch.grad

# Compare the gradients
# print("dLdW (numpy):", dLdW[0, 0:5])
# print("dLdW_torch:", dLdW_torch.numpy()[0, 0:5])
print("Match:", np.allclose(dLdW, dLdW_torch.numpy(), atol=1e-4, rtol=1e-4))

# print("dLdb (numpy):", dLdb[0:10].reshape(-1, 1))
# print("dLdb_torch:", dLdb_torch.numpy()[0:10].reshape(-1, 1))
print("Match:", np.allclose(dLdb.reshape(-1, 1), dLdb_torch.numpy().reshape(-1, 1), atol=1e-2, rtol=1e-2))

# print("dLdX (numpy):", dLdX[0, 0:5])
# print("dLdX_torch:", dLdX_torch.numpy()[0, 0:5])
print("Match:", np.allclose(dLdX, dLdX_torch.numpy(), atol=1e-4, rtol=1e-4))

FileNotFoundError: [Errno 2] No such file or directory: '../with-torch-tests/linear-backward/X_c.npy'