In [8]:
import torch
from torch import nn
from src.data.make_cifar10_dataset import CIFAR10, get_img_transformer
from torchvision.utils import make_grid
import matplotlib.pyplot as plt
import seaborn as sns
from src.models.cifar10_conv_model import Cifar10ConvModel
from sklearn import metrics
import numpy as np
import torch.nn.functional as nnf
from skorch import NeuralNetClassifier
from mapie.classification import MapieClassifier
from mapie.metrics import (classification_coverage_score,
                           classification_mean_width_score)
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
import os.path
import pickle
import itertools
import pathlib

from src.utils.bootstrap_utils import train_classifier

In [58]:
device = "cuda" if torch.cuda.is_available() else "cpu"
test_dataset = CIFAR10(split="test", root='../data/processed', download=True,
                      transform=get_img_transformer())
test_dataloader = torch.utils.data.DataLoader(
        test_dataset, batch_size=len(test_dataset), drop_last=True)
calib_dataset = CIFAR10(split="calib", root='../data/processed', download=True,
                      transform=get_img_transformer())
calib_dataloader = torch.utils.data.DataLoader(
        test_dataset, batch_size=len(calib_dataset), drop_last=True)
train_dataset = CIFAR10(split="train", root='../data/processed', download=True,
                      transform=get_img_transformer())
train_dataloader = torch.utils.data.DataLoader(
        test_dataset, batch_size=len(train_dataset))

n_classes = len(set(test_dataset.targets))
classes = {index: name for name, index in test_dataset.class_to_idx.items()}
model = Cifar10ConvModel()
# model.load_state_dict(torch.load("../models/trained_model.pt"))

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [18]:
base_folder = "cifar-10-batches-py"
downloaded_list = [
        ["data_batch_1", "c99cafc152244af753f735de768cd75f"],
        ["data_batch_2", "d4bba439e000b95fd0a9bffe97cbabec"],
        ["data_batch_3", "54ebc095f3ab1f0389bbae665268c751"],
        ["data_batch_4", "634d18415352ddfa80567beed471001a"],
    ]
data =[]
targets =[]
for file_name, checksum in downloaded_list:
    file_path = os.path.join("../data/processed", base_folder, file_name)
    with open(file_path, "rb") as f:
        entry = pickle.load(f, encoding="latin1")
        data.append(entry["data"])
        if "labels" in entry:
            targets.extend(entry["labels"])
        else:
            targets.extend(entry["fine_labels"])

In [3]:
net = NeuralNetClassifier(
    model,
    max_epochs=10,
    lr=0.1,
)

In [4]:
X_train = next(iter(train_dataloader))[0].numpy()
y_train = next(iter(train_dataloader))[1].numpy()
X_test = next(iter(test_dataloader))[0].numpy()
y_test = next(iter(test_dataloader))[1].numpy()

In [7]:

X_calib = next(iter(calib_dataloader))[0].numpy()
y_calib = next(iter(calib_dataloader))[1].numpy()

In [6]:
net.initialize()

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=Cifar10ConvModel(
    (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (dropout): Dropout(p=0.25, inplace=False)
    (fc1): Linear(in_features=512, out_features=256, bias=True)
    (fc2): Linear(in_features=256, out_features=64, bias=True)
    (out): Linear(in_features=64, out_features=10, bias=True)
  ),
)

In [7]:
pred = net.predict(X_test)

In [120]:
metrics.accuracy_score(y_test, pred)

0.6535

-----------------


In [113]:
y_pred_proba[0].max()

2.067634

In [142]:
X_calib.shape

(10000, 3, 32, 32)

In [None]:
clf = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')
clf.fit(train_dataset.data, train_dataset.targets)
y_pred = clf.predict(X_test)
y_pred_proba = clf.predict_proba(X_test)
y_pred_proba_max = np.max(y_pred_proba, axis=1)
mapie_score = MapieClassifier(estimator=clf, cv="prefit", method="score")
mapie_score.fit(X_calib, y_calib)
# alpha = [0.2, 0.1, 0.05]
# y_pred_score, y_ps_score = mapie_score.predict(X_test_mesh, alpha=alpha)

In [7]:
data  = torch.randn(100, 64, 1, 28, 28, device='cpu')
min_batches = data[:5]

In [8]:
min_batches.shape

torch.Size([5, 64, 1, 28, 28])

In [15]:
sample_idx = torch.randint(2000, size=(1,)).item()

In [5]:
random_train_idx = np.random.choice(np.array(range(len(train_dataset))),replace=False, size=25600)
train_subset = torch.utils.data.Subset(train_dataset, random_train_idx)

In [14]:
dl = torch.utils.data.DataLoader(train_subset, batch_size=32)

In [39]:
train_classifier(model, dl, True)

Step 500     training accuracy: 0.734375
Step 1000    training accuracy: 0.74703125
Step 1500    training accuracy: 0.7454910714285714
Step 2000    training accuracy: 0.756484375
Step 2500    training accuracy: 0.7709375
Step 3000    training accuracy: 0.76859375
Step 3500    training accuracy: 0.780625
Step 4000    training accuracy: 0.7759375
Step 4500    training accuracy: 0.7911875
Step 5000    training accuracy: 0.7925
Step 5500    training accuracy: 0.7985267857142857
Step 6000    training accuracy: 0.80453125
Step 6500    training accuracy: 0.8084375
Step 7000    training accuracy: 0.8199479166666667
Step 7500    training accuracy: 0.8240625
Step 8000    training accuracy: 0.81859375


In [2]:
def accuracy(target, pred):
    return metrics.accuracy_score(target.detach().cpu().numpy(), pred.detach().cpu().numpy())
criterion = nn.CrossEntropyLoss()

confusion_matrix = np.zeros((n_classes, n_classes))
with torch.no_grad():
    model.eval()
    test_accuracies = []
    for inputs, targets in test_dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        output = model(inputs)
        loss = criterion(output, targets)
        print(inputs.shape)
        predictions = output.max(1)[1]

        # Multiply by len(inputs) because the final batch of DataLoader may be smaller (drop_last=True).
        test_accuracies.append(accuracy(targets, predictions) * len(inputs))
        
        # confusion_matrix += compute_confusion_matrix(targets, predictions)

    test_accuracy = np.sum(test_accuracies) / len(test_dataset)
    
    model.train()

NameError: name 'n_classes' is not defined

In [41]:
print(f"Test accuracy: {test_accuracy:.3f}")

Test accuracy: 0.680


In [6]:
from matplotlib.pyplot import axes
from torch import batch_norm
from src.utils.bootstrap_utils import get_ensemble_preparation, train_ensemble
from src.models.cifar10_conv_model import Cifar10ConvModel
train_set = CIFAR10(split='train', root='../data/processed', download=True,
                       transform=get_img_transformer())
models = [Cifar10ConvModel(), Cifar10ConvModel(), Cifar10ConvModel(), Cifar10ConvModel()]
# train_ensemble(models, print_acc=True, num_epoches=1)
# data = torch.randn( 1, 3, 32, 32)

train_ensemble(models, train_set, num_epoches=4)
# pred = get_ensemble_preparation(models, data)




Files already downloaded and verified


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [421]:
def get_softvotes(probs):
    sum_probs = torch.tensor([])
    for pred_idx in range(probs.shape[1]):
        pred_lis = torch.tensor([])
        for model_prob in probs:
            pred_lis = torch.cat((pred_lis, model_prob[pred_idx].unsqueeze(dim=0)))
        sum_probs =torch.cat((sum_probs, torch.sum(pred_lis, dim=0).unsqueeze(dim=0)))
    return sum_probs

In [423]:
def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].float().sum()
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [424]:
from src.data.make_cifar10_dataset import CIFAR10, get_img_transformer
import torch.nn.functional as nnf

dataset = CIFAR10(split='test', root='../data/processed', download=True,
                      transform=get_img_transformer())

Files already downloaded and verified


In [426]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
with torch.no_grad():
    [model.eval() for model in models]
    for inputs, targets in dataloader:
        output = get_ensemble_preparation(models, inputs)
        # print('output:')
        # print(output.size())
        probs = torch.stack([nnf.softmax(x, dim=1) for x in output])
# validate shape and correctness
        assert output.shape == probs.shape
        assert torch.all(torch.eq(probs[0], nnf.softmax(output[0], dim=1))).item()

        acc = accuracy(get_softvotes(probs), targets)[0].item()/100.0
        print(acc)
        # probs = np.array([nnf.softmax(x, dim=1).detach().numpy() for x in output])
        
        # sum_probs = np.sum(probs, axis=0)
        # print("sum")
        # print(sum_probs)
        # print(f"prediction: {np.argmax(sum_probs)}" )
        # print(f"target:{targets}")
        # print(f"probs:")
        # print(probs)
        # break

0.8125
0.59375
0.875
0.625
0.875
0.71875
0.75
0.59375
0.59375
0.65625
0.78125
0.875
0.65625
0.75
0.65625
0.59375
0.78125
0.71875
0.75
0.71875
0.59375
0.8125
0.78125
0.65625
0.78125
0.59375
0.71875
0.5625
0.78125
0.8125
0.71875
0.78125
0.8125
0.6875
0.75
0.59375
0.78125
0.84375
0.84375
0.8125
0.75
0.59375
0.625
0.75
0.75
0.71875
0.875
0.78125
0.90625
0.875
0.59375
0.6875
0.78125
0.78125
0.53125
0.75
0.59375
0.65625
0.8125
0.5625
0.6875
0.75
0.8125
0.5625
0.78125
0.6875
0.5625
0.78125
0.78125
0.8125
0.65625
0.5625
0.84375
0.75
0.6875
0.8125
0.78125
0.8125
0.65625
0.8125
0.75
0.75
0.78125
0.71875
0.625
0.6875
0.8125
0.71875
0.78125
0.75
0.6875
0.75
0.75
0.84375
0.84375
0.6875
0.59375
0.84375
0.75
0.65625
0.78125
0.65625
0.8125
0.75
0.6875
0.8125
0.71875
0.75
0.6875
0.84375
0.75
0.75
0.75
0.75
0.875
0.8125
0.78125
0.71875
0.6875
0.84375
0.5625
0.8125
0.53125
0.75
0.8125
0.71875
0.6875
0.6875
0.625
0.6875
0.8125
0.75
0.75
0.71875
0.71875
0.59375
0.84375
0.8125
0.6875
0.78125
0.625
0.625
0.7

In [409]:
# print(output[0])
# print("=====")
# probs = torch.tensor()



# torch.eq(probs, output)
# for x in output:
    # nnf.softmax(x, dim=1)
print(probs.numpy())

[[[1.7948967e-05 2.7834473e-03 4.8145669e-04 ... 1.0736908e-05
   4.9595234e-05 4.7396938e-03]
  [3.9964016e-03 2.2137694e-03 9.9002190e-02 ... 5.3547500e-03
   6.0709799e-03 2.2735300e-03]
  [7.4287809e-02 5.0519593e-04 6.3119245e-01 ... 4.8127263e-03
   3.9686598e-02 1.0570972e-03]
  ...
  [2.7672156e-06 1.4352670e-05 5.3852453e-04 ... 2.2338359e-07
   1.4825059e-05 1.5478002e-04]
  [6.1442150e-04 1.4444434e-07 8.1972891e-01 ... 8.6542146e-05
   4.5060391e-05 6.9018142e-07]
  [9.2804078e-07 9.9976152e-01 8.2217018e-11 ... 1.3848707e-12
   6.8618983e-06 2.3074000e-04]]

 [[2.7701558e-04 2.8405702e-03 1.0112638e-02 ... 5.5235454e-05
   4.9509935e-04 5.5824551e-03]
  [5.9990771e-03 1.1627412e-03 7.9407595e-02 ... 7.9791958e-04
   3.0931637e-03 3.2709162e-03]
  [4.6678022e-03 1.1893477e-04 3.0543259e-01 ... 2.7741797e-03
   4.2088074e-03 7.1993977e-04]
  ...
  [2.7674162e-06 2.0695081e-05 2.4089379e-04 ... 3.3351590e-08
   1.1284867e-06 2.0651106e-04]
  [1.2935445e-02 2.7627312e-08 9.478

In [220]:
nnf.softmax(torch.tensor([[-1.2971e+01, -2.2279e+01, -3.9716e+00, -9.4084e+00,  4.3156e+00,
         -4.1985e-01, -1.7038e+01,  1.1829e+01, -2.5635e+01, -1.3364e+01]]), dim=1)

tensor([[1.6953e-11, 1.5376e-15, 1.3729e-07, 5.9769e-10, 5.4542e-04, 4.7880e-06,
         2.9039e-13, 9.9945e-01, 5.3623e-17, 1.1444e-11]])

In [415]:
def get_softvotes(probs):
    sum_probs = torch.tensor([])
    for pred_idx in range(probs.shape[1]):
        pred_lis = torch.tensor([])
        for model_prob in probs:
            pred_lis = torch.cat((pred_lis, model_prob[pred_idx].unsqueeze(dim=0)))
        sum_probs =torch.cat((sum_probs, torch.sum(pred_lis, dim=0).unsqueeze(dim=0)))
    return sum_probs

In [416]:
def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)
    
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].float().sum()
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [417]:
acc = accuracy(get_softvotes(), targets)[0].item()/100.0

tensor([[2.2969e-01, 3.6828e-01, 2.4209e-01, 1.2599e+00, 8.8491e-02, 7.3073e-01,
         2.6864e-01, 1.2785e-01, 2.9833e-01, 3.8602e-01],
        [1.0330e-03, 3.8624e-04, 1.0482e-01, 4.8279e-01, 2.9720e-01, 2.5483e+00,
         2.4274e-01, 3.2169e-01, 1.0723e-04, 9.1972e-04],
        [2.1115e-04, 5.8661e-01, 1.1665e-07, 1.6987e-06, 1.9581e-09, 7.8862e-08,
         1.8200e-06, 5.2618e-07, 1.4877e-05, 3.4132e+00],
        [6.3916e-02, 4.5867e-04, 2.4714e+00, 5.4850e-01, 5.6941e-01, 2.3085e-01,
         5.0071e-02, 5.0040e-02, 1.3901e-02, 1.4667e-03],
        [3.5400e+00, 6.8625e-04, 1.8815e-02, 1.0720e-04, 1.5802e-03, 1.1556e-05,
         2.9676e-05, 3.8793e-05, 4.3540e-01, 3.3648e-03],
        [1.1054e-03, 2.8399e-04, 2.4528e-06, 1.8643e-05, 1.2702e-08, 1.1464e-05,
         1.1113e-06, 5.7141e-05, 6.4715e-06, 3.9985e+00],
        [2.4177e+00, 2.5023e-01, 3.5398e-01, 1.1592e-01, 3.9577e-01, 3.7699e-02,
         2.3450e-02, 4.7116e-02, 3.3495e-01, 2.3146e-02],
        [1.6973e-05, 3.9902

In [418]:
acc


0.75

In [9]:
str(pathlib.Path(__file__).parent.absolute()) 

NameError: name '__file__' is not defined