In [1]:
import numpy as np

def load_and_convert_to_float32(filename):
    data = np.load(filename)
    return data.astype(np.float32)


test_x = load_and_convert_to_float32('test_x.npy')
test_y = load_and_convert_to_float32('test_y.npy')
train_x = load_and_convert_to_float32('train_x.npy')
train_y = load_and_convert_to_float32('train_y.npy')


In [7]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

input_size = train_x.shape[1]

num_classes = len(np.unique(train_y))

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNetwork, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU()
        )
        self.output_layer = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.output_layer(x)
        return x

model = NeuralNetwork(input_size=input_size, num_classes=num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)



In [8]:
from torch.utils.data import TensorDataset, DataLoader

train_x_tensor = torch.tensor(train_x).float()
train_y_tensor = torch.tensor(train_y).long()
test_x_tensor = torch.tensor(test_x).float()
test_y_tensor = torch.tensor(test_y).long()

train_dataset = TensorDataset(train_x_tensor, train_y_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

test_dataset = TensorDataset(test_x_tensor, test_y_tensor)
test_loader = DataLoader(dataset=test_dataset, batch_size=64)


for epoch in range(10):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

model.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy * 100}%')


Accuracy: 82.06%


In [9]:
import torch
import torch.autograd.profiler as profiler

test_x_tensor = torch.tensor(test_x).float()
test_y_tensor = torch.tensor(test_y).long()
test_dataset = TensorDataset(test_x_tensor, test_y_tensor)
test_loader = DataLoader(dataset=test_dataset, batch_size=64)

num_batches_to_profile = 100

with profiler.profile(record_shapes=True, profile_memory=True, use_cuda=torch.cuda.is_available()) as prof:
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(test_loader):
            if i >= num_batches_to_profile:
                break
            outputs = model(inputs)

print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))


-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
enumerate(DataLoader)#_SingleProcessDataLoaderIter._...        22.40%      59.753ms        45.99%     122.678ms       1.215ms      12.67 Mb           0 b           101  
                                           aten::linear         0.73%       1.934ms        42.56%     113.505ms     283.762us      22.36 Mb      42.00 Kb           400  
                                            aten::addmm        37.01%      98.708ms        40.65%     108.428ms     271.070us      22.36 Mb      22.36

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torch.autograd.profiler as profiler
import numpy as np

train_x_tensor = torch.tensor(train_x).float()
train_y_tensor = torch.tensor(train_y).long()
test_x_tensor = torch.tensor(test_x).float()
test_y_tensor = torch.tensor(test_y).long()

train_dataset = TensorDataset(train_x_tensor, train_y_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

test_dataset = TensorDataset(test_x_tensor, test_y_tensor)
test_loader = DataLoader(dataset=test_dataset, batch_size=64)

class NeuralNetworkReLU(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNetworkReLU, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.layers(x)

class NeuralNetworkTanh(NeuralNetworkReLU):
    def __init__(self, input_size, num_classes):
        super(NeuralNetworkTanh, self).__init__(input_size, num_classes)
        self.layers[2] = nn.Tanh()
        self.layers[5] = nn.Tanh()
        self.layers[8] = nn.Tanh()

class NeuralNetworkSigmoid(NeuralNetworkReLU):
    def __init__(self, input_size, num_classes):
        super(NeuralNetworkSigmoid, self).__init__(input_size, num_classes)
        self.layers[2] = nn.Sigmoid()
        self.layers[5] = nn.Sigmoid()
        self.layers[8] = nn.Sigmoid()

class NeuralNetworkGeLU(NeuralNetworkReLU):
    def __init__(self, input_size, num_classes):
        super(NeuralNetworkGeLU, self).__init__(input_size, num_classes)
        self.layers[2] = nn.GELU()
        self.layers[5] = nn.GELU()
        self.layers[8] = nn.GELU()


input_size = train_x.shape[1]
num_classes = len(np.unique(train_y))

model_relu = NeuralNetworkReLU(input_size, num_classes)
model_tanh = NeuralNetworkTanh(input_size, num_classes)
model_sigmoid = NeuralNetworkSigmoid(input_size, num_classes)
model_gelu = NeuralNetworkGeLU(input_size, num_classes)

models = {'ReLU': model_relu, 'Tanh': model_tanh, 'Sigmoid': model_sigmoid, 'GeLU': model_gelu}

for activation_function, model in models.items():
    with profiler.profile(record_shapes=True, profile_memory=True, use_cuda=torch.cuda.is_available()) as prof:
        model.eval()
        with torch.no_grad():
            for i, (inputs, _) in enumerate(test_loader):
                if i >= 100:  # Profile on 100 batches
                    break
                outputs = model(inputs)

    print(f"Profiling results for {activation_function}:")
    print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))
    print("\n")


Profiling results for ReLU:
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
enumerate(DataLoader)#_SingleProcessDataLoaderIter._...        49.91%     193.445ms        64.93%     251.687ms       2.492ms      12.67 Mb           0 b           101  
                                           aten::linear         0.53%       2.073ms        29.73%     115.249ms     288.123us      22.36 Mb      42.00 Kb           400  
                                            aten::addmm        26.85%     104.061ms        28.38%     109.989ms     274.97

In [11]:
class NeuralNetworkBatchNorm(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNetworkBatchNorm, self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(input_size, 512), nn.BatchNorm1d(512), nn.ReLU())
        self.layer2 = nn.Sequential(nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU())
        self.layer3 = nn.Sequential(nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU())
        self.output_layer = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return self.output_layer(x)

class NeuralNetworkDropout(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NeuralNetworkDropout, self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(input_size, 512), nn.Dropout(0.5), nn.ReLU())
        self.layer2 = nn.Sequential(nn.Linear(512, 256), nn.Dropout(0.5), nn.ReLU())
        self.layer3 = nn.Sequential(nn.Linear(256, 128), nn.Dropout(0.5), nn.ReLU())
        self.output_layer = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return self.output_layer(x)


In [12]:
models = {
    "BatchNorm": NeuralNetworkBatchNorm(input_size, num_classes),
    "Dropout": NeuralNetworkDropout(input_size, num_classes)
}

for name, model in models.items():
    with profiler.profile(record_shapes=True, profile_memory=True, use_cuda=torch.cuda.is_available()) as prof:
        model.eval()
        with torch.no_grad():
            for i, (inputs, _) in enumerate(test_loader):
                if i >= 100:
                    break
                outputs = model(inputs)

    print(f"Profiling results for {name}:")
    print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10))

Profiling results for BatchNorm:
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
enumerate(DataLoader)#_SingleProcessDataLoaderIter._...        57.06%     243.266ms        70.63%     301.127ms       2.981ms      12.67 Mb           0 b           101  
                                           aten::linear         0.45%       1.931ms        24.81%     105.772ms     264.430us      22.36 Mb      20.00 Kb           400  
                                            aten::addmm        22.31%      95.110ms        23.63%     100.753ms     2