In [1]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from PIL.Image import fromarray, Image
from sklearn.utils.estimator_checks import check_estimator
from torch.utils.data import DataLoader, SubsetRandomSampler, Dataset
from tqdm import tqdm
from datetime import datetime as dt
import torch.optim as optim
import torch.nn.functional as F

from typing import Tuple
import torch
from torch import nn
from torchvision import transforms
import cv2
import numpy as np
import random

EPOCHS        = 3
RANDOM_STATE  = 1337
LEARNING_RATE = 1e-3 
BATCH_SIZE    = 64
random.seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)

In [2]:
def validate(device: str, epoch: int, optimizer, loss_fn, model, dataset: DataLoader):
    # Validation
    model.eval()
    it_eval = tqdm(enumerate(dataset), total=len(dataset))
    running_loss = 0.
    correct = 0
    qt = 1
    metrics = dict(tp=0, tn=0, fp=0, fn=0)
    y_pred = list()
    y_true = list()
    with torch.no_grad():
        for _, (x, y) in it_eval:
            x = x.to(device)
            y = y.to(device)

            output = model(x)
            running_loss += loss_fn(output, y).item()
            y_pred.extend(torch.argmax(output, 1).cpu().numpy())
            y_true.extend(y.data.cpu().numpy())
            correct += torch.sum(torch.argmax(output, 1).eq(y)).item()
            qt += len(x)
            desc = f"[{now()}] Epoch {str(epoch).zfill(3)} Val. Acc: {correct/qt:.4f} Val. Loss: {running_loss / len(dataset):.8f}"
            it_eval.set_description(desc)

    
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    metrics["tp"] = tp
    metrics["fp"] = fp
    metrics["tn"] = tn
    metrics["fn"] = fn
    return running_loss / len(dataset), correct/qt, metrics

def train(device: str, epoch: int, optimizer, loss_fn, model, dataset: DataLoader):
    model.train()
    running_loss = 0.
    qt = 1
    correct = 0
    it = tqdm(enumerate(dataset), total=len(dataset))

    for _, (x, y) in it:
        x = x.to(device)
        y = y.to(device)
        
        # Make predictions for this batch
        outputs = model(x)

        # Zero your gradients for every batch!
        optimizer.zero_grad()
        loss = loss_fn(outputs, y)
        loss.backward()

        # Adjust learning weights
        optimizer.step()
        correct += torch.sum(torch.argmax(outputs, 1).eq(y)).item()
        qt += len(x)
    
        # Gather data and report
        running_loss += loss.item()

        desc = f"[{now()}] Epoch {str(epoch).zfill(3)} Acc: {correct/qt:.4f} Loss: {running_loss / len(dataset):.8f}"
        it.set_description(desc)
    return running_loss / len(dataset), correct/qt

def now():
    return dt.now().strftime("%d-%m-%Y %H-%M-%S")

class ToImage:
        
    def __call__(self, array: torch.Tensor, keep_normalization=True):
        feat = array.shape[0]
        n = int(np.ceil(feat ** 0.5))

        array = array.cpu().numpy().copy()
        
        # Squared size with padding
        array.resize((n, n))
        if not keep_normalization:
            return (array * 255).astype(np.uint8)

        return torch.Tensor(array.astype(np.float32)).unsqueeze(0)

class Resize:
    def __init__(self, shape):
        self._shape = shape

    def __call__(self, X, rgb=True):
        device = 'cpu'
        
        if isinstance(X, Image):
            X = np.array(X)
        
        if isinstance(X, torch.Tensor):
            device = X.device.type
            X = X.squeeze(0).cpu().numpy()

        ret = cv2.resize(X, dsize=self._shape, interpolation=cv2.INTER_CUBIC)
        
        if rgb:
            ret = cv2.cvtColor(ret, cv2.COLOR_GRAY2BGR)

        ret = torch.Tensor(ret)
        if rgb:
            ret = ret.view(3, *self._shape)
        
        if device == 'cuda':
            # ret = ret.to('cuda')
            ...
            
        return ret

class CustomDataset(Dataset):
    def __init__(self, subset: Tuple[torch.Tensor, torch.Tensor], transform=None):
        self.subset = subset
        self.transform = transform
        
    def __getitem__(self, index):
        x, y = self.subset[0][index, :], self.subset[1][index]
    
        if self.transform:
            x = self.transform(x)

        return x, y
        
    def __len__(self):
        return self.subset[0].size(0)

# MLP

In [3]:
data = np.loadtxt('/data/img_nids/mlp/NIGEL_2014_01.csv', skiprows=1, delimiter=",", dtype=np.float32)
X, y = data[:, :-1], data[:, -1]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=RANDOM_STATE)

In [31]:
classifier = MLPClassifier(max_iter=EPOCHS, random_state=RANDOM_STATE, verbose=True)

In [49]:
data.shape

(259946, 2049)

In [53]:
aa = np.array(list(map(lambda x: ToImage().fit_transform(x, None), data)))

(550045736,)

In [39]:
w = []
for month in [str(i).zfill(2) for i in range(1, 13)]:
    path = f'/data/img_nids/mlp/NIGEL_2014_{month}.csv'

    data = np.loadtxt(path,skiprows=1, delimiter=",", dtype=np.float32)
    X, y = data[:, :-1], data[:, -1]
    if month == '01':
        X_train, X_test, y_train, y_test = train_test_split(data[:, :-1], data[:, -1], test_size=.3, random_state=RANDOM_STATE)
        classifier.fit(X_train, y_train)
        print(classifier.score(X_test, y_test), month)
    else:
        y_pred = classifier.predict(data[:, :-1])
        tn, fp, fn, tp = confusion_matrix(data[:, -1], y_pred).ravel()
        print(tp, tn, fp, fn, month)
        w.append(dict(fp=fp, fn=fn, tp=tp, tn=tn))

71710 67982 4525 797 01
37297 27419 10381 503 02
42576 31895 11578 897 03
27906 27174 1133 401 04
66319 45444 21753 878 05
36818 36111 1339 632 06
115744 76676 39397 329 07
53498 51938 2114 554 08
50008 193 49862 47 09
82763 57635 25643 515 10
60378 325273 8795 273690 11
128635 87549 42424 1338 12


In [41]:
fpr = []
fnr = []

for row in w:
    fpr.append(row['fp'] / (row['fp'] + row['tn']))
    fnr.append(row['fn'] / (row['fn'] + row['tp']))

In [42]:
fpr

[0.06240776752589405,
 0.2746296296296296,
 0.2663262254732823,
 0.040025435404670226,
 0.3237198089200411,
 0.0357543391188251,
 0.3394157125257381,
 0.03911048619847554,
 0.996144241334532,
 0.3079204591848988,
 0.026326975346336674,
 0.32640625360651826]

In [43]:
fnr

[0.010992042147654708,
 0.013306878306878307,
 0.020633496653095024,
 0.014166107323276928,
 0.013066059496703721,
 0.016875834445927905,
 0.0028344231647325392,
 0.010249389476800118,
 0.0009389671361502347,
 0.006184106246547708,
 0.819264341391573,
 0.010294445769505974]

In [44]:
with open('data.csv', 'w') as fp:
    fp.write("fpr,fnr\n")
    for _fp, _fn in zip(fpr, fnr):
        fp.write(f"{_fp},{_fn}\n")

# CNN

In [3]:
class SimpleNN(nn.Module):
    def __init__(self, in_features, out_features):
        super(SimpleNN, self).__init__()
        self._out_features = out_features
        self._in_features  = in_features

        self.conv1 = nn.Conv2d(3, in_features, kernel_size=4)
        self.conv2 = nn.Conv2d(in_features, 32, kernel_size=4)

        # out_channels (conv2) * saída da último max pooling ** 2
        self.fc1 = nn.Linear(32 * 35 * 35, 512) 
        self.fc2 = nn.Linear(512, out_features)
        self.dropout = nn.Dropout(p=0.4) 

    def forward(self, x: torch.Tensor):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2) # Max pooling over a (2, 2) window
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 3) # If the size is a square, you can specify with a single number

        x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.dropout(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)

In [4]:
data = np.loadtxt('/data/img_nids/mlp/NIGEL_2014_01.csv', skiprows=1, delimiter=',', dtype=np.float32)
X, y = data[:, :-1], data[:, -1]

In [5]:
y.shape

(145014,)

In [6]:
device = 'cuda'
model = SimpleNN(224, 2)
model = model.to(device)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=RANDOM_STATE)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=.3, random_state=RANDOM_STATE)

X_train = torch.Tensor(X_train)
y_train = torch.LongTensor(y_train)
X_test  = torch.Tensor(X_test)
y_test  = torch.LongTensor(y_test)
X_val   = torch.Tensor(X_val)
y_val   = torch.LongTensor(y_val)

transform = transforms.Compose([
    ToImage(),
    Resize((224, 224)),
])

data_train = DataLoader(CustomDataset(subset=(X_train, y_train), transform=transform), shuffle=True, batch_size=BATCH_SIZE, num_workers=8)
data_test  = DataLoader(CustomDataset(subset=(X_test, y_test), transform=transform), shuffle=True, batch_size=BATCH_SIZE, num_workers=8)
data_val   = DataLoader(CustomDataset(subset=(X_val, y_val), transform=transform), shuffle=True, batch_size=BATCH_SIZE, num_workers=8)

loss_fn   = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [28]:
transform(X_train[0])[1]

tensor([[ 2.1609e-02,  2.1609e-02,  5.4941e-02,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 4.0421e-04,  4.0421e-04,  4.0421e-04,  ...,  5.6651e-02,
          2.4317e-02,  2.4317e-02],
        [ 2.4317e-02, -4.0131e-04, -4.0131e-04,  ...,  6.8874e-06,
          6.8874e-06, -4.1733e-04],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  5.5416e-12,
          5.5416e-12,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  1.3460e-04,
          4.4298e-05,  4.4298e-05]])

In [8]:
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train(device, epoch, optimizer, loss_fn, model, data_train)
    val_loss, val_acc, _ = validate(device, epoch, optimizer, loss_fn, model, data_val)

[02-11-2023 12-28-25] Epoch 001 Acc: 0.9433 Loss: 0.36980523: 100%|██████████| 1587/1587 [05:24<00:00,  4.89it/s]
[02-11-2023 12-28-36] Epoch 001 Val. Acc: 0.9453 Val. Loss: 0.36782611: 100%|██████████| 204/204 [00:10<00:00, 18.81it/s]
[02-11-2023 12-34-04] Epoch 002 Acc: 0.9415 Loss: 0.37161473: 100%|██████████| 1587/1587 [05:28<00:00,  4.84it/s]
[02-11-2023 12-34-15] Epoch 002 Val. Acc: 0.9446 Val. Loss: 0.36858508: 100%|██████████| 204/204 [00:10<00:00, 18.82it/s]
[02-11-2023 12-39-46] Epoch 003 Acc: 0.9465 Loss: 0.36673708: 100%|██████████| 1587/1587 [05:30<00:00,  4.81it/s]
[02-11-2023 12-39-57] Epoch 003 Val. Acc: 0.9450 Val. Loss: 0.36822003: 100%|██████████| 204/204 [00:10<00:00, 18.76it/s]


In [24]:
data2 = np.loadtxt(f'/data/img_nids/mlp/NIGEL_2014_02.csv', skiprows=1, delimiter=',', dtype=np.float32)
        
X_test2, y_test2 = data2[:, :-1], data2[:, -1]
X_test2  = torch.Tensor(X_test2)
y_test2  = torch.LongTensor(y_test2)

In [25]:
X_test2

tensor([[0.0562, 0.0000, 0.0000,  ..., 0.0107, 0.0673, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0788, 0.0000],
        [0.1454, 0.1223, 0.0000,  ..., 0.0439, 0.2630, 0.0000],
        ...,
        [0.0000, 0.0882, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0882, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0882, 0.0000,  ..., 0.0000, 0.0000, 0.0000]])

In [26]:
data1 = np.loadtxt(f'/data/img_nids/mlp/NIGEL_2014_01.csv', skiprows=1, delimiter=',', dtype=np.float32)
        
X_test1, y_test1 = data1[:, :-1], data1[:, -1]
X_test1  = torch.Tensor(X_test1)
y_test1  = torch.LongTensor(y_test1)

In [27]:
X_test1

tensor([[0.0353, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.2826, 0.0000, 0.0000,  ..., 0.0000, 0.0268, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.1898, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.1898, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.1898, 0.0000]])

In [9]:
all_metrics = dict()
for month in [str(i).zfill(2) for i in range(1, 13)]:
    #if month != '01':
    data = np.loadtxt(f'/data/img_nids/mlp/NIGEL_2014_{month}.csv', skiprows=1, delimiter=',', dtype=np.float32)
        
    X_test, y_test = data[:, :-1], data[:, -1]
    X_test  = torch.Tensor(X_test)
    y_test  = torch.LongTensor(y_test)

    #cd = CustomDataset(subset=(X_test, y_test), transform=transform)
    #data_test  = DataLoader(cd, shuffle=True, batch_size=BATCH_SIZE, num_workers=8)
    data_test = DataLoader(CustomDataset(subset=(X_test, y_test), transform=transform), shuffle=True, batch_size=BATCH_SIZE, num_workers=8)

    _, _, metrics = validate(device, epoch, optimizer, loss_fn, model, data_test)
    all_metrics[month] = metrics
    print(all_metrics[month], month)

[02-11-2023 12-54-21] Epoch 003 Val. Acc: 0.9477 Val. Loss: 0.36559208: 100%|██████████| 2266/2266 [01:58<00:00, 19.18it/s]


{'tp': 70468, 'tn': 66957, 'fp': 5550, 'fn': 2039} 01


[02-11-2023 12-55-35] Epoch 003 Val. Acc: 0.9485 Val. Loss: 0.36476093: 100%|██████████| 1182/1182 [01:02<00:00, 19.06it/s]


{'tp': 36498, 'tn': 35206, 'fp': 2594, 'fn': 1302} 02


[02-11-2023 12-57-00] Epoch 003 Val. Acc: 0.9394 Val. Loss: 0.37384153: 100%|██████████| 1359/1359 [01:11<00:00, 18.94it/s]


{'tp': 40728, 'tn': 40949, 'fp': 2524, 'fn': 2745} 03


[02-11-2023 12-57-56] Epoch 003 Val. Acc: 0.9594 Val. Loss: 0.35378276: 100%|██████████| 885/885 [00:46<00:00, 18.91it/s]


{'tp': 26912, 'tn': 27407, 'fp': 900, 'fn': 1395} 04


[02-11-2023 13-00-08] Epoch 003 Val. Acc: 0.9705 Val. Loss: 0.34278195: 100%|██████████| 2100/2100 [01:50<00:00, 18.92it/s]


{'tp': 65142, 'tn': 65284, 'fp': 1913, 'fn': 2055} 05


[02-11-2023 13-01-23] Epoch 003 Val. Acc: 0.9540 Val. Loss: 0.35921598: 100%|██████████| 1171/1171 [01:02<00:00, 18.80it/s]


{'tp': 35224, 'tn': 36231, 'fp': 1219, 'fn': 2226} 06


[02-11-2023 13-05-12] Epoch 003 Val. Acc: 0.9597 Val. Loss: 0.35354299: 100%|██████████| 3628/3628 [03:11<00:00, 18.93it/s]


{'tp': 112651, 'tn': 110144, 'fp': 5929, 'fn': 3422} 07


[02-11-2023 13-06-59] Epoch 003 Val. Acc: 0.9616 Val. Loss: 0.35163130: 100%|██████████| 1690/1690 [01:30<00:00, 18.73it/s]


{'tp': 51599, 'tn': 52355, 'fp': 1697, 'fn': 2453} 08


[02-11-2023 13-08-41] Epoch 003 Val. Acc: 0.9505 Val. Loss: 0.36274437: 100%|██████████| 1565/1565 [01:23<00:00, 18.83it/s]


{'tp': 46620, 'tn': 48532, 'fp': 1523, 'fn': 3435} 09


[02-11-2023 13-11-26] Epoch 003 Val. Acc: 0.9533 Val. Loss: 0.35995749: 100%|██████████| 2603/2603 [02:19<00:00, 18.70it/s]


{'tp': 79449, 'tn': 79327, 'fp': 3951, 'fn': 3829} 10


[02-11-2023 13-22-21] Epoch 003 Val. Acc: 0.9695 Val. Loss: 0.34372303: 100%|██████████| 10440/10440 [09:16<00:00, 18.77it/s]


{'tp': 331440, 'tn': 316342, 'fp': 17726, 'fn': 2628} 11


[02-11-2023 13-26-38] Epoch 003 Val. Acc: 0.9462 Val. Loss: 0.36702626: 100%|██████████| 4062/4062 [03:37<00:00, 18.64it/s]


{'tp': 126761, 'tn': 119207, 'fp': 10766, 'fn': 3212} 12


In [10]:
all_metrics

{'01': {'tp': 70468, 'tn': 66957, 'fp': 5550, 'fn': 2039},
 '02': {'tp': 36498, 'tn': 35206, 'fp': 2594, 'fn': 1302},
 '03': {'tp': 40728, 'tn': 40949, 'fp': 2524, 'fn': 2745},
 '04': {'tp': 26912, 'tn': 27407, 'fp': 900, 'fn': 1395},
 '05': {'tp': 65142, 'tn': 65284, 'fp': 1913, 'fn': 2055},
 '06': {'tp': 35224, 'tn': 36231, 'fp': 1219, 'fn': 2226},
 '07': {'tp': 112651, 'tn': 110144, 'fp': 5929, 'fn': 3422},
 '08': {'tp': 51599, 'tn': 52355, 'fp': 1697, 'fn': 2453},
 '09': {'tp': 46620, 'tn': 48532, 'fp': 1523, 'fn': 3435},
 '10': {'tp': 79449, 'tn': 79327, 'fp': 3951, 'fn': 3829},
 '11': {'tp': 331440, 'tn': 316342, 'fp': 17726, 'fn': 2628},
 '12': {'tp': 126761, 'tn': 119207, 'fp': 10766, 'fn': 3212}}

In [11]:
fpr = []
fnr = []

for row in all_metrics.values():
    fpr.append(row['fp'] / (row['fp'] + row['tn']))
    fnr.append(row['fn'] / (row['fn'] + row['tp']))

In [12]:
fpr, fnr

([0.07654433365054408,
  0.06862433862433863,
  0.058059025142042184,
  0.03179425583777864,
  0.02846853282140572,
  0.032550066755674234,
  0.05107992384103108,
  0.03139569303633538,
  0.030426530816102287,
  0.04744350248565047,
  0.05306105343822216,
  0.08283258830680218],
 [0.028121422759181874,
  0.034444444444444444,
  0.0631426402594714,
  0.04928109654855689,
  0.030581722398321354,
  0.059439252336448596,
  0.02948144702040957,
  0.04538222452453193,
  0.06862451303566078,
  0.04597852974374985,
  0.0078666618772226,
  0.0247128249713402])

In [13]:
with open('data_cnn.csv', 'w') as fp:
    fp.write("fpr,fnr\n")
    for _fp, _fn in zip(fpr, fnr):
        fp.write(f"{_fp},{_fn}\n")

# MLP (PyTorch)

In [7]:
class SimpleMLP(nn.Module):
    def __init__(self, in_feat, out_feat):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(in_feat * in_feat * 3, 200)
        self.fc2 = nn.Linear(200, out_feat)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x), dim=1)
        return x

In [4]:
data = np.loadtxt('/data/img_nids/mlp/NIGEL_2014_01.csv', skiprows=1, delimiter=',', dtype=np.float32)
X, y = data[:, :-1], data[:, -1]

In [10]:
device = 'cuda'
s = SimpleMLP(224, 2)
s = s.to(device)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=RANDOM_STATE)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=.3, random_state=RANDOM_STATE)

X_train = torch.Tensor(X_train)
y_train = torch.LongTensor(y_train)
X_test  = torch.Tensor(X_test)
y_test  = torch.LongTensor(y_test)
X_val   = torch.Tensor(X_val)
y_val   = torch.LongTensor(y_val)

transform = transforms.Compose([
    ToImage(),
    Resize((224, 224)),
])

BATCH_SIZE=1024

data_train = DataLoader(CustomDataset(subset=(X_train, y_train), transform=transform), shuffle=True, batch_size=BATCH_SIZE, num_workers=8)
data_test  = DataLoader(CustomDataset(subset=(X_test, y_test), transform=transform), shuffle=True, batch_size=BATCH_SIZE, num_workers=8)
data_val   = DataLoader(CustomDataset(subset=(X_val, y_val), transform=transform), shuffle=True, batch_size=BATCH_SIZE, num_workers=8)

loss_fn   = nn.CrossEntropyLoss()
optimizer = optim.Adam(s.parameters(), lr=LEARNING_RATE)

In [11]:
for epoch in range(1, 10 + 1):
    train_loss, train_acc = train(device, epoch, optimizer, loss_fn, s, data_train)
    val_loss, val_acc, _ = validate(device, epoch, optimizer, loss_fn, s, data_val)

[02-11-2023 13-38-15] Epoch 001 Acc: 0.9414 Loss: 0.37104732: 100%|██████████| 100/100 [00:28<00:00,  3.50it/s]
[02-11-2023 13-38-19] Epoch 001 Val. Acc: 0.9444 Val. Loss: 0.36888554: 100%|██████████| 13/13 [00:04<00:00,  3.14it/s]
[02-11-2023 13-38-47] Epoch 002 Acc: 0.9500 Loss: 0.36354794: 100%|██████████| 100/100 [00:27<00:00,  3.61it/s]
[02-11-2023 13-38-52] Epoch 002 Val. Acc: 0.9530 Val. Loss: 0.36057164: 100%|██████████| 13/13 [00:04<00:00,  3.15it/s]
[02-11-2023 13-39-20] Epoch 003 Acc: 0.9549 Loss: 0.35816712: 100%|██████████| 100/100 [00:27<00:00,  3.63it/s]
[02-11-2023 13-39-24] Epoch 003 Val. Acc: 0.9530 Val. Loss: 0.35993358: 100%|██████████| 13/13 [00:04<00:00,  3.23it/s]
[02-11-2023 13-39-52] Epoch 004 Acc: 0.9549 Loss: 0.35851328: 100%|██████████| 100/100 [00:27<00:00,  3.65it/s]
[02-11-2023 13-39-56] Epoch 004 Val. Acc: 0.9531 Val. Loss: 0.36020758: 100%|██████████| 13/13 [00:04<00:00,  3.13it/s]
[02-11-2023 13-40-24] Epoch 005 Acc: 0.9549 Loss: 0.35836409: 100%|█████

In [12]:
all_metrics = dict()
for month in [str(i).zfill(2) for i in range(1, 13)]:
    data = np.loadtxt(f'/data/img_nids/mlp/NIGEL_2014_{month}.csv', skiprows=1, delimiter=',', dtype=np.float32)
    X_test, y_test = data[:, :-1], data[:, -1]
    X_test  = torch.Tensor(X_test)
    y_test  = torch.LongTensor(y_test)

    cd = CustomDataset(subset=(X_test, y_test), transform=transform)
    data_test  = DataLoader(cd, shuffle=True, batch_size=BATCH_SIZE, num_workers=8)

    _, _, metrics = validate(device, epoch, optimizer, loss_fn, s, data_test)
    all_metrics[month] = metrics
    print(all_metrics)

[02-11-2023 13-48-46] Epoch 010 Val. Acc: 0.9549 Val. Loss: 0.35838755: 100%|██████████| 142/142 [00:38<00:00,  3.66it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}}


[02-11-2023 13-49-19] Epoch 010 Val. Acc: 0.9560 Val. Loss: 0.35720385: 100%|██████████| 74/74 [00:20<00:00,  3.61it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}}


[02-11-2023 13-49-56] Epoch 010 Val. Acc: 0.9552 Val. Loss: 0.35801235: 100%|██████████| 85/85 [00:23<00:00,  3.59it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}, '03': {'tp': 42352, 'tn': 40698, 'fp': 2775, 'fn': 1121}}


[02-11-2023 13-50-24] Epoch 010 Val. Acc: 0.9680 Val. Loss: 0.34517964: 100%|██████████| 56/56 [00:15<00:00,  3.55it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}, '03': {'tp': 42352, 'tn': 40698, 'fp': 2775, 'fn': 1121}, '04': {'tp': 27515, 'tn': 27286, 'fp': 1021, 'fn': 792}}


[02-11-2023 13-51-22] Epoch 010 Val. Acc: 0.9777 Val. Loss: 0.33555658: 100%|██████████| 132/132 [00:35<00:00,  3.67it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}, '03': {'tp': 42352, 'tn': 40698, 'fp': 2775, 'fn': 1121}, '04': {'tp': 27515, 'tn': 27286, 'fp': 1021, 'fn': 792}, '05': {'tp': 66337, 'tn': 65059, 'fp': 2138, 'fn': 860}}


[02-11-2023 13-51-54] Epoch 010 Val. Acc: 0.9687 Val. Loss: 0.34464903: 100%|██████████| 74/74 [00:20<00:00,  3.64it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}, '03': {'tp': 42352, 'tn': 40698, 'fp': 2775, 'fn': 1121}, '04': {'tp': 27515, 'tn': 27286, 'fp': 1021, 'fn': 792}, '05': {'tp': 66337, 'tn': 65059, 'fp': 2138, 'fn': 860}, '06': {'tp': 36471, 'tn': 36087, 'fp': 1363, 'fn': 979}}


[02-11-2023 13-53-33] Epoch 010 Val. Acc: 0.9672 Val. Loss: 0.34599504: 100%|██████████| 227/227 [01:01<00:00,  3.68it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}, '03': {'tp': 42352, 'tn': 40698, 'fp': 2775, 'fn': 1121}, '04': {'tp': 27515, 'tn': 27286, 'fp': 1021, 'fn': 792}, '05': {'tp': 66337, 'tn': 65059, 'fp': 2138, 'fn': 860}, '06': {'tp': 36471, 'tn': 36087, 'fp': 1363, 'fn': 979}, '07': {'tp': 114963, 'tn': 109578, 'fp': 6495, 'fn': 1110}}


[02-11-2023 13-54-19] Epoch 010 Val. Acc: 0.9732 Val. Loss: 0.34005924: 100%|██████████| 106/106 [00:28<00:00,  3.68it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}, '03': {'tp': 42352, 'tn': 40698, 'fp': 2775, 'fn': 1121}, '04': {'tp': 27515, 'tn': 27286, 'fp': 1021, 'fn': 792}, '05': {'tp': 66337, 'tn': 65059, 'fp': 2138, 'fn': 860}, '06': {'tp': 36471, 'tn': 36087, 'fp': 1363, 'fn': 979}, '07': {'tp': 114963, 'tn': 109578, 'fp': 6495, 'fn': 1110}, '08': {'tp': 53053, 'tn': 52151, 'fp': 1901, 'fn': 999}}


[02-11-2023 13-55-02] Epoch 010 Val. Acc: 0.9670 Val. Loss: 0.34625103: 100%|██████████| 98/98 [00:26<00:00,  3.66it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}, '03': {'tp': 42352, 'tn': 40698, 'fp': 2775, 'fn': 1121}, '04': {'tp': 27515, 'tn': 27286, 'fp': 1021, 'fn': 792}, '05': {'tp': 66337, 'tn': 65059, 'fp': 2138, 'fn': 860}, '06': {'tp': 36471, 'tn': 36087, 'fp': 1363, 'fn': 979}, '07': {'tp': 114963, 'tn': 109578, 'fp': 6495, 'fn': 1110}, '08': {'tp': 53053, 'tn': 52151, 'fp': 1901, 'fn': 999}, '09': {'tp': 48458, 'tn': 48347, 'fp': 1708, 'fn': 1597}}


[02-11-2023 13-56-13] Epoch 010 Val. Acc: 0.9686 Val. Loss: 0.34460838: 100%|██████████| 163/163 [00:44<00:00,  3.64it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}, '03': {'tp': 42352, 'tn': 40698, 'fp': 2775, 'fn': 1121}, '04': {'tp': 27515, 'tn': 27286, 'fp': 1021, 'fn': 792}, '05': {'tp': 66337, 'tn': 65059, 'fp': 2138, 'fn': 860}, '06': {'tp': 36471, 'tn': 36087, 'fp': 1363, 'fn': 979}, '07': {'tp': 114963, 'tn': 109578, 'fp': 6495, 'fn': 1110}, '08': {'tp': 53053, 'tn': 52151, 'fp': 1901, 'fn': 999}, '09': {'tp': 48458, 'tn': 48347, 'fp': 1708, 'fn': 1597}, '10': {'tp': 82357, 'tn': 78976, 'fp': 4302, 'fn': 921}}


[02-11-2023 13-59-15] Epoch 010 Val. Acc: 0.9703 Val. Loss: 0.15543642:  45%|████▌     | 296/653 [01:21<01:32,  3.85it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fda3c099e10>
Traceback (most recent call last):
  File "/home/pedro/Projects/sac_2023/venv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/home/pedro/Projects/sac_2023/venv/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
[02-11-2023 13-59-16] Epoch 010 Val. Acc: 0.9704 Val. Loss: 0.15646766:  46%|████▌     | 298/653 [01:22<01:31,  3.90it/s]Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fda3c099e10>
Traceback (most recent call 

{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}, '03': {'tp': 42352, 'tn': 40698, 'fp': 2775, 'fn': 1121}, '04': {'tp': 27515, 'tn': 27286, 'fp': 1021, 'fn': 792}, '05': {'tp': 66337, 'tn': 65059, 'fp': 2138, 'fn': 860}, '06': {'tp': 36471, 'tn': 36087, 'fp': 1363, 'fn': 979}, '07': {'tp': 114963, 'tn': 109578, 'fp': 6495, 'fn': 1110}, '08': {'tp': 53053, 'tn': 52151, 'fp': 1901, 'fn': 999}, '09': {'tp': 48458, 'tn': 48347, 'fp': 1708, 'fn': 1597}, '10': {'tp': 82357, 'tn': 78976, 'fp': 4302, 'fn': 921}, '11': {'tp': 333415, 'tn': 314704, 'fp': 19364, 'fn': 653}}


[02-11-2023 14-02-40] Epoch 010 Val. Acc: 0.9517 Val. Loss: 0.36155899: 100%|██████████| 254/254 [01:08<00:00,  3.72it/s]


{'01': {'tp': 71998, 'tn': 66470, 'fp': 6037, 'fn': 509}, '02': {'tp': 37319, 'tn': 34955, 'fp': 2845, 'fn': 481}, '03': {'tp': 42352, 'tn': 40698, 'fp': 2775, 'fn': 1121}, '04': {'tp': 27515, 'tn': 27286, 'fp': 1021, 'fn': 792}, '05': {'tp': 66337, 'tn': 65059, 'fp': 2138, 'fn': 860}, '06': {'tp': 36471, 'tn': 36087, 'fp': 1363, 'fn': 979}, '07': {'tp': 114963, 'tn': 109578, 'fp': 6495, 'fn': 1110}, '08': {'tp': 53053, 'tn': 52151, 'fp': 1901, 'fn': 999}, '09': {'tp': 48458, 'tn': 48347, 'fp': 1708, 'fn': 1597}, '10': {'tp': 82357, 'tn': 78976, 'fp': 4302, 'fn': 921}, '11': {'tp': 333415, 'tn': 314704, 'fp': 19364, 'fn': 653}, '12': {'tp': 128969, 'tn': 118416, 'fp': 11557, 'fn': 1004}}


In [13]:
fpr = []
fnr = []

for row in all_metrics.values():
    fpr.append(row['fp'] / (row['fp'] + row['tn']))
    fnr.append(row['fn'] / (row['fn'] + row['tp']))

In [14]:
with open('data_cnn_resize.csv', 'w') as fp:
    fp.write("fpr,fnr\n")
    for _fp, _fn in zip(fpr, fnr):
        fp.write(f"{_fp},{_fn}\n")

In [15]:
t = torch.Tensor()
t = t.to('cuda')

'cuda'