# Preliminary Modeling

In [1]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle

# from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split


In [5]:
# Load the training images
base_path = './data/train_images/'
for ix, fname in enumerate(os.listdir(base_path)):
    with open(base_path + fname, 'rb') as f:
        f_dict = pickle.load(f, encoding='bytes')
        f_labels = np.array(f_dict[b'labels'])
        f_data = f_dict[b'data'].reshape([10000, 3, 32, 32]).transpose([0, 2, 3, 1])
    labels = np.concatenate((labels, f_labels)) if ix else f_labels
    data = np.concatenate((data, f_data)) if ix else f_data
X_train = data
y_train = to_categorical(labels)

In [3]:
# Load the testing images
base_path = './data/test_images/'
for ix, fname in enumerate(os.listdir(base_path)):
    with open(base_path + fname, 'rb') as f:
        f_dict = pickle.load(f, encoding='bytes')
        f_labels = np.array(f_dict[b'labels'])
        f_data = f_dict[b'data'].reshape([10000, 3, 32, 32]).transpose([0, 2, 3, 1])
    labels = np.concatenate((labels, f_labels)) if ix else f_labels
    data = np.concatenate((data, f_data)) if ix else f_data
X_test = data
y_test = to_categorical(labels)

In [6]:
print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print()
print('X_test shape: ', X_test.shape)
print('y_test shape: ', y_test.shape)

X_train shape:  (50000, 32, 32, 3)
y_train shape:  (50000, 10)

X_test shape:  (10000, 32, 32, 3)
y_test shape:  (10000, 10)


In [7]:
# build a model
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [8]:
# fit the model
h = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=1, verbose=1)



(10,)

In [20]:
pd.DataFrame(data={
    'true_label': y_train,
    'predicted_label': np.NaN,
    'is_annotated': False,
    'annotation_batch': np.NaN,
    'sampling_method': np.NaN,
    'uncertainty_score': np.NaN,
    'diversity_score':
})

Unnamed: 0,true_label,predicted_label,is_annotated
0,6,,False
1,9,,False
2,9,,False
3,4,,False
4,1,,False
...,...,...,...
49995,2,,False
49996,6,,False
49997,9,,False
49998,1,,False


In [7]:
preds = model.predict(X_train)

In [18]:
0.75 * (4/3)

1.0

In [32]:
def margin_of_confidence_score(prob_dist):
    prob_dist[::-1].sort()
    difference = prob_dist[0] - prob_dist[1]
    return 1 - difference

In [33]:
def uncertainty_scores(predictions):
    scores = []
    for prob_dist in predictions:
        score = margin_of_confidence_score(prob_dist)
        scores.append(score)
    return np.array(scores) 

In [27]:
np.sort(preds[1])

array([6.8614244e-01, 2.2179852e-01, 3.7347563e-02, 3.0262331e-02,
       1.0942385e-02, 6.1705755e-03, 2.5920144e-03, 2.4669287e-03,
       1.9037212e-03, 3.7353954e-04], dtype=float32)

In [31]:
uncertainty_score(preds[4])

0.9353553354740143

In [34]:
uncertainty_scores(preds)

[4.6503745e-04 6.9991039e-04 1.2940875e-03 1.7228283e-03 1.1481809e-02
 3.6671530e-02 9.6358582e-02 1.1513564e-01 1.9469997e-01 5.4147053e-01]
[3.0262331e-02 2.2179852e-01 6.1705755e-03 2.5920144e-03 1.0942385e-02
 3.7353954e-04 2.4669287e-03 1.9037212e-03 3.7347563e-02 6.8614244e-01]
[0.11443135 0.03947278 0.24095923 0.07227916 0.09701898 0.01859178
 0.01240703 0.06950395 0.22530982 0.11002599]
[8.6270273e-03 9.8331446e-05 1.5845659e-01 2.5484832e-02 6.3462621e-01
 2.8931728e-02 9.0005465e-02 5.1676374e-02 1.8699926e-03 2.2337456e-04]
[0.0298217  0.4920789  0.00382676 0.00245585 0.00697395 0.00058473
 0.00164608 0.00233572 0.03284207 0.42743424]


In [11]:
a = np.array([4, 3, 7, 6, 4, 8, 6, 5])
a[::-1].sort()
a

array([8, 7, 6, 6, 5, 4, 4, 3])

In [9]:
X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10()

NameError: name 'load_cifar10' is not defined