In [1]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.wrappers.scikit_learn import KerasClassifier

from modAL.models import ActiveLearner

import numpy as np
from keras.datasets import mnist
from sklearn.datasets import load_digits
from keras import backend as K
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [2]:
# load the file
def load_data(filename, nb_instance, all_instance):
  n = all_instance #number of records in file
  s = nb_instance #sample size
  skip = sorted(random.sample(range(1,n+1),n-s))

  #df = pd.read_csv(filename,sep="\t",nrows=nb_instance)
  df = pd.read_csv(filename,sep="\t",skiprows=skip)

  df.label_image = pd.Categorical(df.label_image)
  df['label_image_code'] = df.label_image.cat.codes

  df[['image','label_image','label_image_code']]

  df.describe()
  return df

# read, resize and flatten into 1-dimension array
def pre_process_image(data):
  img_arr = []
  for index,row in data.iterrows():
    read_img = cv2.imread(row['image'],1)
    img_resize = cv2.resize(read_img,(224,224))
    #img_ravel = img_resize.ravel()
    img_arr.append(img_resize)

  img_np = np.array(img_arr)

  return img_np

In [None]:
# Load CrisisMMD dataset

start_time = time.time()

#train_instance = 9601
#test_instance = 1534

all_train_instance = 9601
all_test_instance = 1534

# Use 10 % amount of data for initial phase
train_instance = 9601
test_instance = 1534

train_filename = "CrisisMMD/data/task_data/task_informative_text_img_agreed_lab_train.tsv"
test_filename = "CrisisMMD/data/task_data/task_informative_text_img_agreed_lab_test.tsv"

data_train = load_data(train_filename,train_instance, all_train_instance)
data_images_train = pre_process_image(data_train)

data_test = load_data(test_filename,test_instance, all_test_instance)
data_image_test = pre_process_image(data_test)

#X_train = data_images_train.reshape(train_instance,-1)
X_train = data_images_train
y_train = data_train['label_image_code'].values

#X_test = data_image_test.reshape(test_instance,-1)
X_test = data_image_test
y_test = data_test['label_image_code'].values

final_time = time.time() - start_time

In [2]:
# build function for the Keras' scikit-learn API

def create_keras_model():
    """
    This function compiles and returns a Keras model.
    Should be passed to KerasClassifier in the Keras scikit-learn API.
    """

    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(8, 8, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    
    opt = Adam(learning_rate=0.000001)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    return model


In [23]:
# create the classifier
classifier = KerasClassifier(create_keras_model)

In [22]:
K.clear_session()

In [24]:
# read training data
# (X_train, y_train), (X_test, y_test) = mnist.load_data()
# X_train = X_train.reshape(60000, 28, 28, 1).astype('float32') / 255
# X_test = X_test.reshape(10000, 28, 28, 1).astype('float32') / 255

digits = load_digits()
n_samples = len(digits.images)

X = digits.images.reshape(n_samples,-1)
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25,shuffle=True)

X_train = X_train.reshape(1347, 8, 8, 1)
X_test = X_test.reshape(450, 8, 8, 1)
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)



# assemble initial data
n_initial = 500
initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False)


X_initial = X_train[initial_idx]
y_initial = y_train[initial_idx]

# generate the pool
# remove the initial data from the training dataset
X_pool = np.delete(X_train, initial_idx, axis=0)
y_pool = np.delete(y_train, initial_idx, axis=0)

In [25]:
# initialize ActiveLearner
learner = ActiveLearner(
    estimator=classifier,
    X_training=X_initial, y_training=y_initial,
    verbose=0
)

In [26]:
# the active learning loop
n_queries = 100
for idx in range(n_queries):
    print('Query no. %d' % (idx + 1))
    query_idx, query_instance = learner.query(X_pool, n_instances=100)
    learner.teach(
        X=X_pool[query_idx], y=y_pool[query_idx], epochs=10,batch_size=8,
        verbose=0
    )
    # remove queried instance from pool
    X_pool = np.delete(X_pool, query_idx, axis=0)
    y_pool = np.delete(y_pool, query_idx, axis=0)
    model_accuracy = learner.score(X_test,y_test,verbose=0)
    print('Accuracy after query {n}: {acc:0.4f}'.format(n=idx + 1, acc=model_accuracy))

Query no. 1
Accuracy after query 1: 0.0956
Query no. 2
Accuracy after query 2: 0.0489
Query no. 3
Accuracy after query 3: 0.1556
Query no. 4
Accuracy after query 4: 0.1267
Query no. 5
Accuracy after query 5: 0.1022
Query no. 6
Accuracy after query 6: 0.1311
Query no. 7
Accuracy after query 7: 0.0467
Query no. 8
Accuracy after query 8: 0.0956
Query no. 9


AssertionError: n_instances must be less or equal than the size of utility