# Testing texture dataset

The samples with label 0 cannot be recognised by the model.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')

In [3]:
import datetime
import os
import time

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from torch.utils.data import DataLoader, TensorDataset

from defences.util import dataset2tensor
from models.numeric import NumericModel
from models.torch_util import train, validate, print_acc_per_label

In [4]:
DATASET_FILE = 'texture_preprocessed.csv'
TEST_SIZE = 1000
# DATASET_FILE = 'segment_preprocessed.csv'
TEST_SIZE = 400
BATCH_SIZE = 256
EPOCHS = 400

In [5]:
data_path = os.path.join('..', 'data', DATASET_FILE)
df = pd.read_csv(data_path, sep=',')

In [6]:
y = df['Class'].to_numpy().astype(np.long)
X = df.drop(['Class'], axis=1).to_numpy().astype(np.float32)
labels = np.unique(y)

N_FEATURES = X.shape[1]
N_CLASSES = len(labels)

print(X.shape, y.shape)
print(labels)
print(len(labels))

(5000, 40) (5000,)
[0 1 2 3 4 5 6 7 8 9]
10


In [7]:
X[:5]

array([[-1.226, -0.648, -0.503, -0.473, -0.256, -0.138, -0.7  , -0.424,
        -0.319, -0.647, -1.226, -0.717, -0.48 , -0.679, -0.456, -0.533,
        -0.873, -0.555, -0.529, -0.611, -1.226, -0.939, -0.853, -0.822,
        -0.678, -0.592, -0.943, -0.76 , -0.689, -0.88 , -1.226, -0.835,
        -0.633, -0.791, -0.586, -0.632, -0.958, -0.689, -0.643, -0.725],
       [-1.299, -0.613, -0.595, -0.466, -0.275, -0.205, -0.748, -0.497,
        -0.42 , -0.866, -1.299, -0.728, -0.562, -0.692, -0.475, -0.571,
        -0.884, -0.571, -0.531, -0.659, -1.299, -1.075, -0.98 , -0.985,
        -0.839, -0.767, -1.08 , -0.906, -0.848, -0.99 , -1.299, -0.925,
        -0.814, -0.849, -0.727, -0.731, -0.994, -0.802, -0.728, -0.813],
       [-1.072, -0.479, -0.563, -0.27 , -0.174, -0.097, -0.482, -0.343,
        -0.256, -0.689, -1.072, -0.618, -0.454, -0.588, -0.409, -0.486,
        -0.753, -0.494, -0.447, -0.505, -1.072, -0.815, -0.861, -0.695,
        -0.636, -0.563, -0.8  , -0.691, -0.637, -0.858, -1.072

## ISSUE

Normalization is the problem

In [8]:
# # Normalize data
standard = StandardScaler().fit(X)
X = standard.transform(X)

# minmax = MinMaxScaler(feature_range=(0,1)).fit(X)
# X = minmax.transform(X)

In [9]:
print(X.min(axis=0))
print(X.max(axis=0))
print(X.mean(axis=0))

[-1.6991761 -3.0204427 -2.3236039 -3.1316085 -3.0885038 -3.4629464
 -3.0620043 -2.9431925 -3.1191134 -2.0393856 -1.6991761 -2.4841971
 -2.1920342 -2.925627  -2.6142213 -2.629669  -2.3299778 -2.34724
 -2.448813  -2.0833259 -1.6991761 -2.2112603 -1.7178668 -2.2588022
 -2.4808836 -2.794671  -2.2396243 -2.3203485 -2.4681873 -1.9936692
 -1.6991761 -2.782059  -1.921552  -3.267038  -2.6351414 -2.4603403
 -2.8487718 -2.5013092 -2.6669505 -2.040127 ]
[8.943331  4.362412  2.8368363 4.2319946 3.15971   3.0317202 5.533038
 3.6416948 3.1631396 2.7966745 8.943331  4.6503043 2.9794364 5.627714
 3.0580568 3.747291  3.7859569 2.5217977 2.3836923 2.558026  8.943331
 5.8147793 3.557258  4.6076035 3.4804652 2.9618707 5.694679  3.9676657
 3.4285564 3.646693  8.943331  6.221525  3.3817766 6.460301  4.1681867
 4.2579293 5.1361303 3.4013631 3.4550242 3.195457 ]
[-3.0517577e-09 -1.2207031e-08  6.1035155e-09  0.0000000e+00
  0.0000000e+00 -2.4414062e-08  2.4414062e-08  1.2207031e-08
  1.2207031e-08 -6.1035155e-

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=1234)

print(X_train.shape, X_test.shape)

dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long))
dataset_test = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.long))
loader_train = DataLoader(dataset_train, BATCH_SIZE, shuffle=True)
loader_test = DataLoader(dataset_test, BATCH_SIZE, shuffle=True)

(4600, 40) (400, 40)


In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [12]:
model = NumericModel(N_FEATURES, N_FEATURES * 4, N_CLASSES, use_prob=True).to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
loss = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-7)

In [13]:
since = time.time()
for epoch in range(EPOCHS):
    start = time.time()
    tr_loss, tr_acc = train(model, loader_train, loss, optimizer, device)
    va_loss, va_acc = validate(model, loader_test, loss, device)
    scheduler.step()

    time_elapsed = time.time() - start
    if (epoch+1) % 10 == 0:
        print('{:2d}/{:d}[{:s}] Train Loss: {:.4f} Acc: {:.4f}%, Test Loss: {:.4f} Acc: {:.4f}%'.format(
            epoch+1, EPOCHS, str(datetime.timedelta(seconds=time_elapsed)), tr_loss, tr_acc*100, va_loss, va_acc*100))

time_elapsed = time.time() - since
print('Total run time:', str(datetime.timedelta(seconds=time_elapsed)))

10/400[0:00:00.061324] Train Loss: 2.0805 Acc: 40.3478%, Test Loss: 2.0643 Acc: 40.3478%
20/400[0:00:00.071826] Train Loss: 1.9097 Acc: 60.3913%, Test Loss: 1.9022 Acc: 60.8043%
30/400[0:00:00.062217] Train Loss: 1.7677 Acc: 76.9130%, Test Loss: 1.7611 Acc: 77.8261%
40/400[0:00:00.057972] Train Loss: 1.6585 Acc: 85.7609%, Test Loss: 1.6539 Acc: 85.7826%
50/400[0:00:00.059918] Train Loss: 1.5764 Acc: 93.7826%, Test Loss: 1.5721 Acc: 94.0217%
60/400[0:00:00.060584] Train Loss: 1.5214 Acc: 97.4130%, Test Loss: 1.5195 Acc: 97.5435%
70/400[0:00:00.058187] Train Loss: 1.4997 Acc: 98.4348%, Test Loss: 1.4990 Acc: 98.4130%
80/400[0:00:00.059341] Train Loss: 1.4903 Acc: 98.6522%, Test Loss: 1.4898 Acc: 98.7174%
90/400[0:00:00.059783] Train Loss: 1.4851 Acc: 98.8913%, Test Loss: 1.4848 Acc: 98.9130%
100/400[0:00:00.094333] Train Loss: 1.4816 Acc: 99.0870%, Test Loss: 1.4814 Acc: 99.0870%
110/400[0:00:00.059701] Train Loss: 1.4792 Acc: 99.2391%, Test Loss: 1.4790 Acc: 99.2391%
120/400[0:00:00.059

In [14]:
def predict(model, X, device):
    model.eval()
    dataset = TensorDataset(torch.from_numpy(X))
    loader = DataLoader(dataset, batch_size=512, shuffle=False)
    tensor_pred = -torch.ones(len(X), dtype=torch.long)
    
    start = 0
    with torch.no_grad():
        for batch in loader:
            x = batch[0].to(device)
            end = start + x.size(0)
            outputs = model(x)
            tensor_pred[start:end] = outputs.max(1)[1].type(torch.long).cpu().detach()
            start = end
    return tensor_pred.detach().numpy()

In [15]:
print('Training set:')
X, y = dataset2tensor(dataset_train)
X = X.cpu().detach().numpy()
y = y.cpu().detach().numpy()
print_acc_per_label(model, X, y, device)

Training set:
[0] 455/455
[1] 470/472
[2] 453/454
[3] 465/467
[4] 447/447
[5] 446/448
[6] 465/466
[7] 467/467
[8] 469/471
[9] 452/453


In [16]:
print('Test set:')
X, y = dataset2tensor(dataset_test)
X = X.cpu().detach().numpy()
y = y.cpu().detach().numpy()
print_acc_per_label(model, X, y, device)

Test set:
[0] 455/455
[1] 470/472
[2] 453/454
[3] 465/467
[4] 447/447
[5] 446/448
[6] 465/466
[7] 467/467
[8] 469/471
[9] 452/453
