# Train a Patch Classifier via Weak Labeling

This notebook takes strongly labeled data, generates an ensemble of patch classifiers, trains an SVM, then uses all of this to generate weak labels on a ton of unlabeled data. This unlabeled data with weak labels is then combined with the strongly labeled data to train a new model.

In [None]:
#system and IO
import sys
import os
import pickle

#Math and display tools
import matplotlib.pyplot as plt
import numpy as np
import random
from tqdm import tqdm

#Machine learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn import svm
from sklearn.metrics import classification_report

ENSEMBLE_SIZE = 32
BATCH_SIZE = 128
EPOCHS = 256

data_root = "spectrogram_patches_3mo-mosaics_2x-int"
data_paths = list()
#find all data files in the directory
for f in os.listdir(data_root):
    if "arrays.pkl" in f:
        data_paths.append(f[:-17])

#Means and standard deviations used for normalization
MEANS = [1367.8407, 1104.4116, 1026.8099, 856.1295, 
         1072.1476, 1880.3287, 2288.875, 2104.5999, 
         2508.7764, 305.3795, 1686.0194, 946.1319]
STDEVS = [249.14418, 317.69983, 340.8048, 467.8019, 
          390.11594, 529.972, 699.90826, 680.56006, 
          798.34937, 108.10846, 651.8683, 568.5347]

#class to one-hot (keras has a function for this, but it has issues on my machine)
def to_onehot(array, num_classes):
    array = list(array) #ensure the array is a list
    for i in range(len(array)):
        zeros = [0]*num_classes
        zeros[int(array[i])] = 1
        array[i] = zeros
    return np.float32(array)

#takes a random crop of an image
def random_crop2D(img, out_dim):
    in_dim = (img.shape[0], img.shape[1])
    top_left = (random.randrange(0, in_dim[0] - out_dim[0]), random.randrange(0, in_dim[1] - out_dim[1]))
    return img[top_left[0]:(top_left[0]+out_dim[0]), top_left[1]:(top_left[1]+out_dim[1]),:].copy()


# Load the strongly labeled data

In [None]:
raw_data = list()
labels = list()

#open each data file and read
for path in data_paths:
    with open(os.path.join(data_root, path + "_patch_arrays.pkl"), "rb") as f:
        patch = pickle.load(f) #load set of patches
        for img in patch:
            raw_data.append(np.asarray(img)[:,:48,:48,:]) #naive upper left crop
    with open(os.path.join(data_root, path + "_patch_array_labels.pkl"), "rb") as f:
        label = pickle.load(f)
        labels += label
raw_data = np.float32(raw_data) #convert to np array
labels = np.float32(to_onehot(labels, 2)) #convert labels to one-hot, then to np array

#Normalize training data
normalized_data = raw_data.copy()
for i in range(0, 12):
    #normalize each channel to global unit norm
    normalized_data[:,:,:,:,i] = (raw_data[:,:,:,:,i] - MEANS[i])/STDEVS[i]

#produce 48x48x24 dataset
flattened_data = np.concatenate((normalized_data[:, 0, :, :, :], normalized_data[:, 1, :, :, :]), 3)

#produce a (28*28*24,) for the SVM training dataset via randomly cropping the 48x48x24 dataset
X_cropped = list()
Y_cropped = list()
for i in range(0, flattened_data.shape[0]):
    for _ in range(0, 4):
        X_cropped.append(random_crop2D(flattened_data[i], (28, 28)))
        Y_cropped.append(labels[i])

#shuffle cropped data
zipped = list(zip(X_cropped, Y_cropped)) #inefficient, but it works
random.shuffle(zipped)
X_cropped, Y_cropped = zip(*zipped)
X_cropped = np.float32(X_cropped)
Y_cropped = np.float32(Y_cropped)

#Create SVM data
X_flat = np.asarray(X_cropped)
X_flat = X_flat.reshape(X_flat.shape[0],-1) #flatten data to one dimension for SVM
Y_flat = np.asarray(Y_cropped)

#shuffle 2D-data
zipped = list(zip(flattened_data, labels))
random.shuffle(zipped)
flattened_data, labels = zip(*zipped)
flattened_data = np.float32(flattened_data)
labels = np.float32(labels)

# Train ensemble of patch classifiers

In [None]:
def crop_generator(batch_size):
    augmentation_parameters = {
        'featurewise_center': False,
        'rotation_range': 360,
        'vertical_flip': True,
        'horizontal_flip': True,
        'fill_mode': 'reflect'
    }

    datagen = ImageDataGenerator(**augmentation_parameters) #create the datagenerator for augmentation
    generator = datagen.flow((flattened_data, labels), batch_size=batch_size)
    
    while True:
        augmented_X, augmented_Y = generator.next()
        
        cropped_X = list()
        
        for i in range(0, augmented_X.shape[0]):
            cropped_X.append(random_crop2D(augmented_X[i], (28, 28))) #random crop

        yield (np.asarray(cropped_X), augmented_Y)

for i in range(0, ENSEMBLE_SIZE):
    crop_gen = crop_generator(BATCH_SIZE)

    #define model as determined from the hyperparameter optimization experiment
    input_img = keras.Input(shape=(28, 28, 24))
    x = layers.Conv2D(32, (5, 5), padding='same')(input_img)
    x = layers.LeakyReLU()(x)
    x = layers.Conv2D(32, (5, 5), padding='same')(x)
    x = layers.LeakyReLU()(x)
    x = layers.Conv2D(32, (5, 5), padding='same')(x)
    x = layers.LeakyReLU()(x)
    x = layers.MaxPooling2D(2)(x)

    x = layers.Conv2D(32, (4, 4), padding='same')(x)
    x = layers.LeakyReLU()(x)
    x = layers.Conv2D(32, (4, 4), padding='same')(x)
    x = layers.LeakyReLU()(x)
    x = layers.Conv2D(32, (4, 4), padding='same')(x)
    x = layers.LeakyReLU()(x)
    x = layers.MaxPooling2D(2)(x)

    x = layers.Conv2D(32, (3, 3), padding='same')(x)
    x = layers.LeakyReLU()(x)
    x = layers.Conv2D(32, (3, 3), padding='same')(x)
    x = layers.LeakyReLU()(x)
    x = layers.Conv2D(32, (3, 3), padding='same')(x)
    x = layers.LeakyReLU()(x)
    x = layers.MaxPooling2D(2)(x)

    x = layers.Flatten()(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(512, activation=layers.ELU())(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(512, activation=layers.ELU())(x)
    x = layers.Dropout(0.5)(x)
    out = layers.Dense(2, activation="softmax")(x)

    model = keras.Model(input_img, out, name="classifier")

    model.compile(loss="binary_crossentropy", #binary_crossentropy is default for two classes
                  optimizer=keras.optimizers.Adam(epsilon=0.001), #haven't tried other optimizers, might be a good idea, but Adam is quite good usually
                  metrics=["accuracy"])
    
    mcp_save = keras.callbacks.ModelCheckpoint("patch_ensemble/model_" + str(i) + ".hdf5", 
                                               save_best_only=True, monitor='accuracy', verbose=1, mode='max')
    reduce_lr_loss = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=7, verbose=1, min_delta=1e-4, mode='min')


    #train the model
    model.fit(crop_gen,
              epochs=EPOCHS,
              steps_per_epoch=flattened_data.shape[0]//BATCH_SIZE,
              callbacks=[mcp_save, reduce_lr_loss],
              verbose = 1
             )
    #model.save("patch_ensemble/model_" + str(i) + ".hdf5")

# Train SVM

In [None]:
#Train SVM
svm_classifier = svm.SVC()
svm_classifier.fit(X_flat, Y_flat[:,1]) #this will take a loooong time!

pickle.dump(svm_classifier, open("SVM.pkl", "wb")) #save SVM

# Generate Weak Labels

In [None]:
#Generate weak labels
def predict_ensemble(X, models, return_disagreement=False):
    preds = list() #list of model predictions, will be (len(models), len(X), 2) in dimension
    
    #run ensemble
    for model in models:
        preds.append(model.predict(X))
    
    #convert soft predictions to absolute
    for pred in preds:
        for i in range(0, pred.shape[0]):
            y = [0, 0]
            y[np.argmax(pred[i])] = 1.0
            pred[i] = y
    
    #aggregate predictions in votes
    pred_sum = preds[0]
    for i in range(1, len(preds)):
        pred_sum += preds[i]
    
    #pick the prediction with the highest votes
    for i in range(0, len(pred_sum)):
        y = [0, 0]
        y[np.argmax(pred_sum[i])] = 1.0
        pred_sum[i] = y
    
    #compute the std of the votes (a measure of disagreement)
    if return_disagreement:
        return np.asarray(pred_sum), np.std(np.asarray(preds)[:,:,0],0)
    
    return np.asarray(pred_sum)

def create_weak_labels(X, patch_ensemble, pixel_model, SVM, weights, return_all=False):
    print("Running ensemble")
    patch_preds, patch_disagreement = predict_ensemble(X, patch_ensemble, True) #patch predictions
    SVM_preds = list()
    print("Running pixel classifier")
    
    #we need to do some pretty massive adjustments to the data to work with the px classifier
    px_2D = np.reshape(X, (-1,24))
    px_2D = np.asarray([px_2D[:,:12], px_2D[:,12:]]) #this essentially doubles the RAM requirement, not optimal
    px_2D = np.transpose(px_2D, (1,2,0))
    px_2D.shape = (px_2D.shape[0], px_2D.shape[1], px_2D.shape[2], 1)
    for i in range(0, 12):
        px_2D[:,i,:,:] = (px_2D[:,i,:,:]*STDEVS[i]+MEANS[i])/3000.0 #undo the original normalization
        
    #Run the 1 px classifier
    pixel_out = pixel_model.predict(px_2D)
    pixel_out.shape = (-1, 28*28, 2)
    per_image_out = np.mean(pixel_out, axis=1)
    per_image_out = np.int32(per_image_out[:,1] > 0.02) #threshold at 0.02, a predetermined value
    per_image_out = to_onehot(per_image_out, 2)
    
    print("Running SVM")
    for i in tqdm(range(0, X.shape[0])):
        SVM_preds.append(to_onehot(list(SVM.predict(X[i].reshape((1,-1)))), 2)[0])
    SVM_preds = np.asarray(SVM_preds)
    
    weak_labels = list()
    for i in range(0, X.shape[0]):
        weak_label = patch_preds[i]*(1-2*patch_disagreement[i])
        
        #ensure weak label total adds to 1.0
        if weak_label[0] == 0:
            weak_label[0] = 1 - weak_label[1]
        else:
            weak_label[1] = 1 - weak_label[0]
            
        weak_label = weak_label*weights[0] + per_image_out[i]*weights[1] + SVM_preds[i]*weights[2]
        
        total_prob = np.sum(weak_label)
        if total_prob != 1:
            weak_label /= total_prob
        weak_labels.append(weak_label)
    if return_all:
        return np.asarray(weak_labels), patch_preds, patch_disagreement, per_image_out, SVM_preds
    return np.asarray(weak_labels)

def create_weak_labels_bayesian(X, patch_ensemble, pixel_model, SVM, return_all=False):
    print("Running ensemble")
    patch_preds, patch_disagreement = predict_ensemble(X, patch_ensemble, True) #patch predictions
    SVM_preds = list()
    print("Running pixel classifier")
    
    #we need to do some pretty massive adjustments to the data to work with the px classifier
    px_2D = np.reshape(X, (-1,24))
    px_2D = np.asarray([px_2D[:,:12], px_2D[:,12:]]) #this essentially doubles the RAM requirement, not optimal
    px_2D = np.transpose(px_2D, (1,2,0))
    px_2D.shape = (px_2D.shape[0], px_2D.shape[1], px_2D.shape[2], 1)
    for i in range(0, 12):
        px_2D[:,i,:,:] = (px_2D[:,i,:,:]*STDEVS[i]+MEANS[i])/3000.0 #undo the original normalization
        
    #Run the 1 px classifier
    pixel_out = pixel_model.predict(px_2D)
    pixel_out.shape = (-1, 28*28, 2)
    per_image_out = np.mean(pixel_out, axis=1)
    per_image_out = np.int32(per_image_out[:,1] > 0.02) #threshold at 0.02, a predetermined value
    
    print("Running SVM")
    for i in tqdm(range(0, X.shape[0])):
        SVM_preds.append(SVM.predict(X[i].reshape((1,-1))))
    SVM_preds = np.asarray(SVM_preds)
    
    weak_labels = list()
    for i in range(0, X.shape[0]):
        weak_label = patch_preds[i]*(1-2*patch_disagreement[i])
        
        #ensure weak label total adds to 1.0
        if weak_label[0] == 0:
            weak_label[0] = 1 - weak_label[1]
        else:
            weak_label[1] = 1 - weak_label[0]
        
        '''
            ===============1 PX CLASSIFIER STATS===============
            Sensitivity of 1px (recall of positive class): 0.83
            Specificity of 1px (recall of negative class): 0.89
            True positive rate: 0.83
            False negative rate: 0.17
            True negative rate: 0.89
            False positive rate: 0.11
        '''
        #update hypothesis for 1px classifier
        if per_image_out[i] == 0: #No plastic detected
            weak_label[0] = weak_label[0]*0.89/(0.89*weak_label[0] + (1-weak_label[0])*0.17)
            weak_label[1] = 1 - weak_label[0]
        elif per_image_out[i] == 1: #plastic detected
            weak_label[1] = weak_label[1]*0.83/(weak_label[1]*0.83 + (1-weak_label[1])*0.11)
            weak_label[0] = 1 - weak_label[1]
        else:
            print("Error in 1px Bayesian update, this should not be reached!")
        
        '''
            ===============SVM CLASSIFIER STATS===============
            Sensitivity of 1px (recall of positive class): 0.78
            Specificity of 1px (recall of negative class): 0.90
            True positive rate: 0.78
            False negative rate: 0.22
            True negative rate: 0.90
            False positive rate: 0.10
        '''
        if SVM_preds[i] == 0: #No plastic detected
            weak_label[0] = weak_label[0]*0.9/(weak_label[0]*0.9 + (1-weak_label[0])*0.22)
            weak_label[1] = 1 - weak_label[0]
        elif SVM_preds[i] == 1: #plastic detected
            weak_label[1] = weak_label[1]*0.78/(weak_label[1]*0.78 + (1-weak_label[1])*0.10)
            weak_label[0] = 1 - weak_label[1]
        else:
            print("Error in 1px Bayesian update, this should not be reached!")
        
        weak_labels.append(weak_label)
    if return_all:
        return np.asarray(weak_labels), patch_preds, patch_disagreement, per_image_out, SVM_preds
    return np.asarray(weak_labels)

In [None]:
#Load unlabeled data
unlabeled_norm = np.load("unlabeled_28x28x24_99.npy")

#Load ensemble models
models = list()
for i in range(0, ENSEMBLE_SIZE):
    models.append(keras.models.load_model("patch_ensemble_v1.0/model_" + str(i) + ".hdf5"))

#Load 1px model
pixel_model = keras.models.load_model("spectrogram_v0.0.11_2021-07-13.h5")

#Load SVM
SVM = pickle.load( open( "SVM.pkl", "rb" ) )

In [None]:
#create weak labels
weak_labels = create_weak_labels_bayesian(unlabeled_norm, models, pixel_model, SVM)

In [None]:
#save new data
with open('weak_data.npy', 'wb') as f:
    np.save(f, np.concatenate((X_cropped,unlabeled_norm)))
with open('weak_labels.npy', 'wb') as f:
    np.save(f, np.concatenate((Y_cropped,weak_labels)))

# Train new model on weakly labeled data

In [None]:
#train new model on weak data
X_weak = np.load("../Weak Data/weak_data_v2.0.npy")
Y_weak = np.load("../Weak Data/weak_labels_v2.0.npy")

#shuffle data
zipped = list(zip(X_weak, Y_weak)) #inefficient, but it works
random.shuffle(zipped)
X_weak, Y_weak = zip(*zipped)
X_weak = np.float32(X_weak)
Y_weak = np.float32(Y_weak)

#define data augmentation parameters
augmentation_parameters = {
    'featurewise_center': False,
    'rotation_range': 360,
    'vertical_flip': True,
    'horizontal_flip': True,
    'fill_mode': 'reflect'
}

datagen = ImageDataGenerator(**augmentation_parameters) #create the datagenerator for augmentation
#define model as determined from the hyperparameter optimization experiment

input_img = keras.Input(shape=(28, 28, 24))
x = layers.Conv2D(32, (5, 5), padding='same')(input_img)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(32, (5, 5), padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(32, (5, 5), padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.MaxPooling2D(2)(x)

x = layers.Conv2D(32, (4, 4), padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(32, (4, 4), padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(32, (4, 4), padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.MaxPooling2D(2)(x)

x = layers.Conv2D(32, (3, 3), padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(32, (3, 3), padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(32, (3, 3), padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.MaxPooling2D(2)(x)

x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(512, activation=layers.ELU())(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(512, activation=layers.ELU())(x)
x = layers.Dropout(0.5)(x)
out = layers.Dense(2, activation="softmax")(x)

model = keras.Model(input_img, out, name="classifier")

model.compile(loss="binary_crossentropy", #binary_crossentropy is default for two classes
              optimizer=keras.optimizers.Adam(epsilon=0.001), #haven't tried other optimizers, might be a good idea, but Adam is quite good usually
              metrics=["accuracy"])

mcp_save = keras.callbacks.ModelCheckpoint('deep_plastic_model.hdf5', save_best_only=True, 
                                           monitor='loss', verbose=1, mode='min')
reduce_lr_loss = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=7, verbose=1, min_delta=1e-4, mode='min')

#train the model
model.fit(datagen.flow(X_weak, Y_weak, batch_size=BATCH_SIZE), 
          epochs=EPOCHS,
          callbacks=[mcp_save, reduce_lr_loss],
          verbose = 1
         )

In [None]:
'''
#train new model on weak data
X_weak = np.load("weak_data.npy")
Y_weak = np.load("weak_labels.npy")

#shuffle data
zipped = list(zip(X_weak, Y_weak)) #inefficient, but it works
random.shuffle(zipped)
X_weak, Y_weak = zip(*zipped)
X_weak = np.float32(X_weak)
Y_weak = np.float32(Y_weak)

#define data augmentation parameters
augmentation_parameters = {
    'featurewise_center': False,
    'rotation_range': 360,
    'vertical_flip': True,
    'horizontal_flip': True,
    'fill_mode': 'reflect'
}

datagen = ImageDataGenerator(**augmentation_parameters) #create the datagenerator for augmentation
#define model as determined from the hyperparameter optimization experiment
model = keras.Sequential([
            keras.Input(shape=(28,28,24)),
            layers.Conv2D(32, kernel_size=(3), padding='same'),
            layers.LeakyReLU(),
            layers.Conv2D(32, kernel_size=(3), padding='same'),
            layers.LeakyReLU(),
            layers.MaxPooling2D(2),
        
            layers.Conv2D(32, kernel_size=(4), padding='same'),
            layers.LeakyReLU(),
            layers.Conv2D(32, kernel_size=(4), padding='same'),
            layers.LeakyReLU(),
            layers.MaxPooling2D(2),
        
            layers.Conv2D(32, kernel_size=(2), padding='same'),
            layers.LeakyReLU(),
            layers.Conv2D(32, kernel_size=(2), padding='same'),
            layers.LeakyReLU(),
            layers.MaxPooling2D(2),
        
            layers.Flatten(),
            layers.Dropout(0.5),
            layers.Dense(512, activation=layers.ELU()),
            layers.Dropout(0.5),
            layers.Dense(512, activation=layers.ELU()),
            layers.Dropout(0.5),
            layers.Dense(2, activation="softmax")])
model.summary()

model.compile(loss="binary_crossentropy", #binary_crossentropy is default for two classes
              optimizer="adam", #haven't tried other optimizers, might be a good idea, but Adam is quite good usually
              metrics=["accuracy"])

mcp_save = keras.callbacks.ModelCheckpoint('model.hdf5', save_best_only=True, 
                                           monitor='loss', verbose=1, mode='min')
reduce_lr_loss = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=7, verbose=1, min_delta=1e-4, mode='min')

#train the model
model.fit(datagen.flow(X_weak, Y_weak, batch_size=BATCH_SIZE), 
          epochs=EPOCHS,
          callbacks=[mcp_save, reduce_lr_loss],
          verbose = 1
         )
'''

In [None]:
#Test ensemble performance
def predict_ensemble(X, models, return_disagreement=False):
    preds = list() #list of model predictions, will be (len(models), len(X), 2) in dimension
    
    #run ensemble
    for model in models:
        preds.append(model.predict(X))
    
    #convert soft predictions to absolute
    for pred in preds:
        for i in range(0, pred.shape[0]):
            y = [0, 0]
            y[np.argmax(pred[i])] = 1.0
            pred[i] = y
    
    #aggregate predictions in votes
    pred_sum = preds[0]
    for i in range(1, len(preds)):
        pred_sum += preds[i]
    
    #pick the prediction with the highest votes
    for i in range(0, len(pred_sum)):
        y = [0, 0]
        y[np.argmax(pred_sum[i])] = 1.0
        pred_sum[i] = y
    
    #compute the std of the votes (a measure of disagreement)
    if return_disagreement:
        return np.asarray(pred_sum), np.std(np.asarray(preds)[:,:,0],0)
    
    return np.asarray(pred_sum)

#Load ensemble models
models = list()
for i in range(0, ENSEMBLE_SIZE):
    models.append(keras.models.load_model("patch_ensemble_large/model_" + str(i) + ".hdf5"))

print(classification_report(Y_cropped[:,1], predict_ensemble(X_cropped, models)[:,1] > 0.5, target_names=['No TPA', 'TPA']))

In [None]:
#Test weak labeled model performance
model = keras.models.load_model("deep_plastic_model.hdf5")
print(classification_report(Y_cropped[:,1], model.predict(X_cropped)[:,1] > 0.5, target_names=['No TPA', 'TPA']))

In [None]:
#Test SVM Performance
SVM = pickle.load( open( "SVM.pkl", "rb" ) )
SVM_preds = list()
for i in tqdm(range(0, X_flat.shape[0])):
    SVM_preds.append(SVM.predict([X_flat[i]])[0])
print(classification_report(Y_flat[:,1], SVM_preds, target_names=['No TPA', 'TPA']))

In [None]:
#Load ensemble models
models = list()
for i in range(0, ENSEMBLE_SIZE):
    models.append(keras.models.load_model("patch_ensemble/model_" + str(i) + ".hdf5"))

for i in range(0, len(models)):
    print("=======================MODEL " + str(i) + "===========================")
    print(classification_report(Y_cropped[:,1], models[i].predict(X_cropped)[:,1] > 0.5, target_names=['No TPA', 'TPA']))
    print("=========================================================")

In [None]:
#Test weak labeled model performance
print(classification_report(Y_cropped[:,1], model.predict(X_cropped)[:,1] > 0.5, target_names=['No TPA', 'TPA']))

In [None]:
#Load ensemble models
models = list()
for i in range(0, ENSEMBLE_SIZE):
    models.append(keras.models.load_model("patch_ensemble_large/model_" + str(i) + ".hdf5"))

#Load 1px model
pixel_model = keras.models.load_model("spectrogram_v0.0.11_2021-07-13.h5")

#Load SVM
SVM = pickle.load( open( "SVM.pkl", "rb" ) )
asdf = create_weak_labels_bayesian(X_cropped, models, pixel_model, SVM)

In [None]:
plt.plot(model.history.history['loss'])