In [1]:
import pandas as pd
import ast
import torch.nn as nn
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.autograd import Variable

### Load Data

In [20]:
class RedDataset(Dataset):
    def __init__(self, datafile, train):
        self.data = pd.read_csv(datafile)
        self.data = self.data[self.data['train'] == train]
        self.data = self.data.reset_index(drop=True)
        
        print(self.data)
        

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        item = self.data.loc[idx]
        return torch.tensor(ast.literal_eval(item['data'])), torch.tensor(item['label'])

    def shape(self):
        item = self.data.loc[0]
        lst = ast.literal_eval(item['data'])
        return len(lst), len(lst[0])

test_data = RedDataset('RedData.csv', train=False)

print(test_data[0])
print(train_data[0])

                                                   data  label  train
0     [[ 0.48066562, 0.00831655,-0.39940253,-0.35378...      6  False
1     [[ 0.37403064, 0.77835609, 0.66822243, 0.21494...      3  False
2     [[-0.39066542,-0.23299751, 0.30417249, 0.34154...      1  False
3     [[-0.33413517,-0.26792407,-0.10983959, 0.02827...      9  False
4     [[-0.42956538,-0.59341336,-0.01145271, 0.47951...      4  False
...                                                 ...    ...    ...
9995  [[ 0.19634359, 0.67472106, 0.71291648, 0.24256...      1  False
9996  [[-0.09088958,-0.22722709,-0.24110945, 0.03927...      7  False
9997  [[-0.31489196, 0.16063953, 0.63439211, 0.76918...      9  False
9998  [[ 0.26800376, 0.13536895, 0.30791984, 0.55271...      9  False
9999  [[ 0.35186673, 0.25961248, 0.18172967, 0.11000...      6  False

[10000 rows x 3 columns]
(tensor([[ 0.4807,  0.0083, -0.3994, -0.3538],
        [ 0.4291, -0.4309, -0.6659, -0.5255],
        [ 0.3449, -0.2069, -0.0395,  0.07

In [5]:
train_data = RedDataset('RedData.csv', train=True)
print(len(train_data))
test_data = RedDataset('RedData.csv', train=False)
print(len(test_data))

loaders = {
    'train': DataLoader(train_data,
                        batch_size=100,
                        shuffle=True,
                        num_workers=1),

    'test': DataLoader(test_data,
                       batch_size=100,
                       shuffle=True,
                       num_workers=1),
}

60000
10000


### Model architecture

In [4]:
class RedModel(nn.Module):
    def __init__(self, input_size):
        super(RedModel, self).__init__()
        layer_size = input_size[0] * input_size[1]
        self.linear1 = nn.Linear(layer_size, layer_size)
        self.relu1 = nn.ReLU()
        self.out = nn.Linear(layer_size, 10)

    def forward(self, x):
        x1 = x.view(x.size(0), -1)
        x2 = self.linear1(x1)
        x3 = self.relu1(x2)
        output = self.out(x3)
        return {
            'in': x,
            'out': output,
            'trans': x1,
            'linear1': x2,
            'relu1': x3,
        }

### Train model

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')

model = RedModel(train_data.shape())
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
num_epochs = 10


def train(num_epochs, model, loaders):

    model.train()

    # Train the model
    total_step = len(loaders['train'])

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):

            # gives batch data, normalize x when iterate train_loader
            b_x = Variable(images)   # batch x
            b_y = Variable(labels)   # batch output = model(b_x)[0]

            results = model(b_x)['out']
            loss = loss_func(results, b_y)

            # clear gradients for this training step
            optimizer.zero_grad()

            # backpropagation, compute gradients
            loss.backward()                # apply gradients
            optimizer.step()

            if (i + 1) % 100 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                      .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
                pass

        pass

    pass


train(num_epochs, model, loaders)

Device: cpu
Epoch [1/10], Step [100/600], Loss: 1.5895
Epoch [1/10], Step [200/600], Loss: 1.7488
Epoch [1/10], Step [300/600], Loss: 1.5267
Epoch [1/10], Step [400/600], Loss: 1.4877
Epoch [1/10], Step [500/600], Loss: 1.3739
Epoch [1/10], Step [600/600], Loss: 1.4713
Epoch [2/10], Step [100/600], Loss: 1.5069
Epoch [2/10], Step [200/600], Loss: 1.3611
Epoch [2/10], Step [300/600], Loss: 1.2250
Epoch [2/10], Step [400/600], Loss: 1.2422
Epoch [2/10], Step [500/600], Loss: 1.2792
Epoch [2/10], Step [600/600], Loss: 1.4943
Epoch [3/10], Step [100/600], Loss: 1.5449
Epoch [3/10], Step [200/600], Loss: 1.1930
Epoch [3/10], Step [300/600], Loss: 1.4722
Epoch [3/10], Step [400/600], Loss: 1.4463
Epoch [3/10], Step [500/600], Loss: 1.2877
Epoch [3/10], Step [600/600], Loss: 1.4179
Epoch [4/10], Step [100/600], Loss: 1.5591
Epoch [4/10], Step [200/600], Loss: 1.1574
Epoch [4/10], Step [300/600], Loss: 1.1119
Epoch [4/10], Step [400/600], Loss: 1.1926
Epoch [4/10], Step [500/600], Loss: 1.3648

In [11]:
def test():
    model.eval()
    with torch.no_grad():
        for images, labels in loaders['test']:
            results = model(images)
            pred_y = torch.max(results['out'], 1)[1].data.squeeze()
            accuracy = (pred_y == labels).sum().item() / float(labels.size(0))
            pass

    print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
    pass


test()

torch.save(model.state_dict(), 'model.pt')

KeyError: Caught KeyError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/croniev/Code/cysec/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3805, in get_loc
    return self._engine.get_loc(casted_key)
  File "index.pyx", line 167, in pandas._libs.index.IndexEngine.get_loc
  File "index.pyx", line 196, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 2606, in pandas._libs.hashtable.Int64HashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 2630, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 2412

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/croniev/Code/cysec/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 309, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/home/croniev/Code/cysec/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/croniev/Code/cysec/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_14192/2465348463.py", line 10, in __getitem__
    item = self.data.loc[idx]
  File "/home/croniev/Code/cysec/lib/python3.9/site-packages/pandas/core/indexing.py", line 1191, in __getitem__
    return self._getitem_axis(maybe_callable, axis=axis)
  File "/home/croniev/Code/cysec/lib/python3.9/site-packages/pandas/core/indexing.py", line 1431, in _getitem_axis
    return self._get_label(key, axis=axis)
  File "/home/croniev/Code/cysec/lib/python3.9/site-packages/pandas/core/indexing.py", line 1381, in _get_label
    return self.obj.xs(label, axis=axis)
  File "/home/croniev/Code/cysec/lib/python3.9/site-packages/pandas/core/generic.py", line 4301, in xs
    loc = index.get_loc(key)
  File "/home/croniev/Code/cysec/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 3812, in get_loc
    raise KeyError(key) from err
KeyError: 2412
