In [1]:
from prelu_cnn import CNN

from datasets import load_from_disk

# Load training dataset
dataset_path = "../processed_datasets/imagenet_processor"
ds = load_from_disk(dataset_path)
ds

Loading dataset from disk:   0%|          | 0/1550 [00:00<?, ?it/s]

Loading dataset from disk:   0%|          | 0/61 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['pixel_values', 'labels'],
        num_rows: 1280967
    })
    validation: Dataset({
        features: ['pixel_values', 'labels'],
        num_rows: 50000
    })
})

In [2]:
ds = ds['validation'].select(range(1000))
# ds['pixel_values'].shape, type(ds['pixel_values'])
ds

Dataset({
    features: ['pixel_values', 'labels'],
    num_rows: 1000
})

In [3]:
inputs = ds['pixel_values']
labels = ds['labels']

In [4]:
import torch
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Move model and inputs to GPU if available
model = CNN(use_prelu=False, use_builtin_conv=True).to(device)

# Move tensors to the same device as the model
inputs = inputs.to(device)
labels = labels.to(device)

outputs = model(inputs)
outputs

Using device: cuda


tensor([[ 0.2276,  0.3220, -0.3458,  ..., -0.3076,  0.0704, -0.2895],
        [ 0.2216, -0.0497, -0.1900,  ..., -0.2768, -0.0554, -0.1468],
        [ 0.1475,  0.2722, -0.2436,  ...,  0.1617, -0.1047, -0.3831],
        ...,
        [ 0.1871,  0.1047, -0.1447,  ...,  0.0891, -0.1536,  0.0940],
        [ 0.6746,  0.2782, -0.8391,  ...,  0.5544, -0.5694, -0.6035],
        [-0.0197,  0.0915, -0.3428,  ..., -0.1248,  0.0804,  0.0300]],
       device='cuda:0', grad_fn=<AddmmBackward0>)

In [5]:
import numpy as np

# axis=1 means we're finding the maximum value along the second dimension (columns)
# For a 2D tensor with shape (batch_size, num_classes), axis=1 gives us the class index
# with the highest probability for each sample in the batch
predictions = np.argmax(outputs.detach().cpu().numpy(), axis=1)
print("Predictions:", predictions)
print("Labels:", labels.cpu().numpy())
print("Correct predictions:", (predictions == labels.cpu().numpy()).sum(), "out of", len(predictions))


Predictions: [  4 610 222 197 950 844 547 904 577 812 211 415 751 308 239 506 222  20
 560 266 427 222 309 633 222 931 344 308 812 802 221 487 885 751 638 406
 349 885   8 736 802 451 432 704 801  60 456 222 870 432 277 904 954 598
 577 822 816 972 577 766 183 880 734 748 638  60 183 752 644 221 649 867
 447 547 230 730 221 263 349 885 581  45 576 444 334 126 867 553 730 173
 230 481 875 583 805 700 333 751 931 752 359 668 428 576 977 638 576 828
 663 631 427 239 577  17 415 153 734 744 547   4 553 486 610 994 200 875
  89 173 447 598 875  88 239   4 343 846 802  94 867 406 154 875 658 444
 140 176 820 994 816 603 810 577 904 481 717 173 216 820  11 342 153 400
 880 610 802 545 658 900 183 181 870 429 820 807 631 222 547 332 308  23
 463 176 222 900 530 788 730 697 795 970 415 222 220  91 668 222 406 385
 197 487 465  60 788 456 560 994 664 778 447 863 165 456 589 368 885 576
  42 432 734 618 802 553 221 222 176 154 820 944 456 751 695 663 362  86
 950 222 994 870  94 611 782 900 717 4

In [6]:
import torch.nn as nn

loss_fct = nn.CrossEntropyLoss()
loss = loss_fct(outputs, labels)
loss

tensor(6.9531, device='cuda:0', grad_fn=<NllLossBackward0>)