<a href="https://colab.research.google.com/github/agam844/Active-Learning/blob/main/keras_CIPHAR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
import numpy as np
import pandas as pd
import random

Training on the entire dataset

In [None]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
# Preprocess the data
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

model.compile(optimizer= optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# model = tf.keras.Sequential([
#     tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
#     tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dense(10, activation='softmax')
# ])

# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(patience=3, monitor='val_loss')

model.fit(x_train, y_train, batch_size=64, epochs=100, validation_data=(x_test, y_test), callbacks=[early_stopping])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100


<keras.callbacks.History at 0x7f2ee2ff2200>

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')

Test loss: 0.9143
Test accuracy: 0.6783


In [None]:
# Get predicted probabilities for test data
probabilities = model.predict(x_test)

# Get the class labels with the highest probability
predictions_int = np.argmax(probabilities, axis=1)

# Get predicted probabilities for test data
probabilities = model.predict(x_test)

# Get the highest probability for each sample
max_probabilities = np.max(probabilities, axis=1)



In [None]:
print(np.sort(max_probabilities)[:10])
len(max_probabilities)

[0.14337356 0.14367919 0.14433481 0.14860065 0.15034115 0.15617497
 0.15620431 0.15938121 0.15965697 0.16260476]


10000

Training using active Learning

In [None]:
num_entries = min(5000, len(y_train))
max_index = len(x_train)
random_indices = np.random.choice(max_index, size=num_entries, replace=False)

In [None]:
# Select random entries
y_train = np.array(y_train)
selected_x_train = x_train[random_indices]
selected_y_train = y_train[random_indices]

In [None]:
remaining_indices = np.setdiff1d(np.arange(max_index), random_indices)
remaining_x_train = x_train[remaining_indices]
remaining_y_train = y_train[remaining_indices]

In [None]:
(x_train1, y_train1), (x_test1, y_test1) = cifar10.load_data()
remaining_x_train1 = x_train1[remaining_indices]
remaining_y_train1 = y_train1[remaining_indices]

In [None]:
#active learning loop
num_iterations = 10
for iteration in range(num_iterations):
  model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
  ])

#   model = tf.keras.Sequential([
#     tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
#     tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dense(10, activation='softmax')
# ])

  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  model.fit(selected_x_train, selected_y_train, batch_size=64, epochs=10)
  loss, accuracy = model.evaluate(remaining_x_train, remaining_y_train)
  print(f'Test loss: {loss:.4f}')
  print(f'Test accuracy: {accuracy:.4f}')
  #Select additional samples from remaining entries for labeling
  num_additional_samples = 2000

  # Get predicted probabilities for test data
  probabilities = model.predict(remaining_x_train)

  # Get the class labels with the highest probability
  predictions_int = np.argmax(probabilities, axis=1)

  # Get predicted probabilities for test data
  probabilities = model.predict(remaining_x_train)

  # Get the highest probability for each sample
  max_probabilities = np.max(probabilities, axis=1)

  for i in range(len(predictions_int)):
    if predictions_int[i] != remaining_y_train1[i]:
      max_probabilities[i] = 0

  #additional_indices = np.argsort(np.max(predictions, axis=1))[-num_additional_samples:]
  num_additional_samples = 2000
  # additional_indices = np.argsort(np.max(max_probabilities))[:num_additional_samples]
  additional_indices = np.argsort(max_probabilities)[:num_additional_samples]

  # Add the additional samples to the selected dataset
  selected_x_train = np.concatenate((selected_x_train, remaining_x_train[additional_indices]))
  selected_y_train = np.concatenate((selected_y_train, remaining_y_train[additional_indices]))

  # Remove the additional samples from the remaining dataset
  remaining_x_train = np.delete(remaining_x_train, additional_indices, axis=0)
  remaining_y_train = np.delete(remaining_y_train, additional_indices, axis=0)
  remaining_y_train1 = np.delete(remaining_y_train1, additional_indices, axis=0)

  print("Iteration:", iteration + 1)
  print("Number of labeled samples:", len(selected_x_train))
  print("Number of remaining samples:", len(remaining_x_train))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.4999
Test accuracy: 0.4709
Iteration: 1
Number of labeled samples: 7000
Number of remaining samples: 43000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.3263
Test accuracy: 0.5414
Iteration: 2
Number of labeled samples: 9000
Number of remaining samples: 41000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.2927
Test accuracy: 0.5789
Iteration: 3
Number of labeled samples: 11000
Number of remaining samples: 39000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.2495
Test accuracy: 0.6062
Iteration: 4
Number of labeled samples: 13000
Number of remaining samples: 37000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
  ])

# model = tf.keras.Sequential([
#     tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
#     tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dense(10, activation='softmax')
# ])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(selected_x_train, selected_y_train, batch_size=64, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fc9bc0ec820>

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')

Test loss: 1.2045
Test accuracy: 0.5968


Without correctness

In [None]:
num_entries = min(5000, len(y_train))
max_index = len(x_train)
random_indices = np.random.choice(max_index, size=num_entries, replace=False)

# Select random entries
y_train = np.array(y_train)
selected_x_train = x_train[random_indices]
selected_y_train = y_train[random_indices]

remaining_indices = np.setdiff1d(np.arange(max_index), random_indices)
remaining_x_train = x_train[remaining_indices]
remaining_y_train = y_train[remaining_indices]

(x_train1, y_train1), (x_test1, y_test1) = cifar10.load_data()
remaining_x_train1 = x_train1[remaining_indices]
remaining_y_train1 = y_train1[remaining_indices]

In [None]:
#active learning loop
num_iterations = 10
for iteration in range(num_iterations):
  model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
  ])

#   model = tf.keras.Sequential([
#     tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
#     tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dense(10, activation='softmax')
# ])

  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  model.fit(selected_x_train, selected_y_train, batch_size=64, epochs=10)
  loss, accuracy = model.evaluate(remaining_x_train, remaining_y_train)
  print(f'Test loss: {loss:.4f}')
  print(f'Test accuracy: {accuracy:.4f}')

  #additional_indices = np.argsort(np.max(predictions, axis=1))[-num_additional_samples:]
  num_additional_samples = 2000
  # additional_indices = np.argsort(np.max(max_probabilities))[:num_additional_samples]
  additional_indices = np.argsort(max_probabilities)[:num_additional_samples]

  # Add the additional samples to the selected dataset
  selected_x_train = np.concatenate((selected_x_train, remaining_x_train[additional_indices]))
  selected_y_train = np.concatenate((selected_y_train, remaining_y_train[additional_indices]))

  # Remove the additional samples from the remaining dataset
  remaining_x_train = np.delete(remaining_x_train, additional_indices, axis=0)
  remaining_y_train = np.delete(remaining_y_train, additional_indices, axis=0)
  remaining_y_train1 = np.delete(remaining_y_train1, additional_indices, axis=0)

  print("Iteration:", iteration + 1)
  print("Number of labeled samples:", len(selected_x_train))
  print("Number of remaining samples:", len(remaining_x_train))

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
  ])

# model = tf.keras.Sequential([
#     tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
#     tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dense(10, activation='softmax')
# ])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(selected_x_train, selected_y_train, batch_size=64, epochs=10)

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')

Different Baseline

In [None]:
num_entries = min(10000, len(y_train))
max_index = len(x_train)
random_indices = np.random.choice(max_index, size=num_entries, replace=False)

# Select random entries
y_train = np.array(y_train)
selected_x_train = x_train[random_indices]
selected_y_train = y_train[random_indices]

remaining_indices = np.setdiff1d(np.arange(max_index), random_indices)
remaining_x_train = x_train[remaining_indices]
remaining_y_train = y_train[remaining_indices]

(x_train1, y_train1), (x_test1, y_test1) = cifar10.load_data()
remaining_x_train1 = x_train1[remaining_indices]
remaining_y_train1 = y_train1[remaining_indices]

In [None]:
#active learning loop
num_iterations = 10
for iteration in range(num_iterations):
  model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
  ])

#   model = tf.keras.Sequential([
#     tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
#     tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dense(10, activation='softmax')
# ])

  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  model.fit(selected_x_train, selected_y_train, batch_size=64, epochs=10)
  loss, accuracy = model.evaluate(remaining_x_train, remaining_y_train)
  print(f'Test loss: {loss:.4f}')
  print(f'Test accuracy: {accuracy:.4f}')

  # Get predicted probabilities for test data
  probabilities = model.predict(remaining_x_train)

  # Get the class labels with the highest probability
  predictions_int = np.argmax(probabilities, axis=1)

  # Get predicted probabilities for test data
  probabilities = model.predict(remaining_x_train)

  # Get the highest probability for each sample
  max_probabilities = np.max(probabilities, axis=1)

  for i in range(len(predictions_int)):
    if predictions_int[i] != remaining_y_train1[i]:
      max_probabilities[i] = 0

  #additional_indices = np.argsort(np.max(predictions, axis=1))[-num_additional_samples:]
  num_additional_samples = 1000
  # additional_indices = np.argsort(np.max(max_probabilities))[:num_additional_samples]
  additional_indices = np.argsort(max_probabilities)[:num_additional_samples]

  # Add the additional samples to the selected dataset
  selected_x_train = np.concatenate((selected_x_train, remaining_x_train[additional_indices]))
  selected_y_train = np.concatenate((selected_y_train, remaining_y_train[additional_indices]))

  # Remove the additional samples from the remaining dataset
  remaining_x_train = np.delete(remaining_x_train, additional_indices, axis=0)
  remaining_y_train = np.delete(remaining_y_train, additional_indices, axis=0)
  remaining_y_train1 = np.delete(remaining_y_train1, additional_indices, axis=0)

  print("Iteration:", iteration + 1)
  print("Number of labeled samples:", len(selected_x_train))
  print("Number of remaining samples:", len(remaining_x_train))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.2628
Test accuracy: 0.5534
Iteration: 1
Number of labeled samples: 11000
Number of remaining samples: 39000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.1812
Test accuracy: 0.5867
Iteration: 2
Number of labeled samples: 12000
Number of remaining samples: 38000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.2427
Test accuracy: 0.5691
Iteration: 3
Number of labeled samples: 13000
Number of remaining samples: 37000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.1898
Test accuracy: 0.6002
Iteration: 4
Number of labeled samples: 14000
Number of remaining samples: 36000
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
  ])

# model = tf.keras.Sequential([
#     tf.keras.layers.Conv2D(16, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)),
#     tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dense(10, activation='softmax')
# ])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(selected_x_train, selected_y_train, batch_size=64, epochs=10)

In [None]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')