In [1]:
!pip install wandb
import os



In [2]:
import wandb
from wandb.keras import WandbCallback
os.environ["WANDB_DISABLED"] = "true" #This prevents Wandb from logging. Comment it out to re enable.
wandb.login()

2024-03-05 10:54:35.665325: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-05 10:54:35.710244: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-05 10:54:35.849587: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-05 10:54:35.849614: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-05 10:54:35.850632: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

False

In [3]:
import datetime, os
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, BatchNormalization, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.callbacks import Callback
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import random

from sklearn.metrics import classification_report, confusion_matrix

## Parameter Setup

### Parameters

In [4]:
MODEL_NAME = 'Resnet50-P'
PROJECT_NAME = 'Road-Deformations'
RUN_ID = "ResNetP-2"
seed = 10
image_width = 1920
image_height = 1090
im_shape = (image_width, image_height)
continue_previous_training = False # True: load in model to resume training
MODEL_SAVE_DIR = './models/'

### Pipeline Parameters

In [5]:
data_is_zipped = True #If you upload your data in zip files, this will unzip them for you.
DATA_DIR = '/home/christian/Desktop/Creates/CREATEs Rework/Rework V2/Dataset'
LABELED_DATA = False #Data from directory if false. This actually does nothing rn. Will implement later.
labels_path = './pothole_labels.txt'#Also does nothing rn

### HyperParameters

In [6]:
EPOCHS = 2
BATCH_SIZE = 3
LEARNING_RATE = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=10000,
    decay_rate=0.9)
optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE)

2024-03-05 10:54:42.096463: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [7]:
DATA_DIR_BACKUP = DATA_DIR

## Logging Setup

In [8]:
wandb.init(
    project=PROJECT_NAME,
    entity="hechtc87",
    config={
        "name": MODEL_NAME,
        "id": RUN_ID,
        "output_activation": "softmax",
        "optimizer": "adam",
        "loss": "categorical_crossentropy",
        "metric": ["accuracy", "precision", "recall"],
        "epochs": EPOCHS,
        "batch_size": BATCH_SIZE,
        "image_width": image_width,
        "image_height": image_height,
        "num_log_batches": 15,
        "lr":LEARNING_RATE,
        'seed': seed
      }
)


tf.random.set_seed(wandb.config.seed)
np.random.seed(wandb.config.seed)
random.seed(wandb.config.seed)
os.environ['PYTHONHASHSEED'] = str(wandb.config.seed)

# Data Pipeline

In [9]:
data_generator = ImageDataGenerator(
        validation_split=0.2,
        # width_shift_range=0.2,
        # height_shift_range=0.2,
        rescale=1./255,
        # shear_range=0.2,
        zoom_range=0.1,
        rotation_range=20,
        horizontal_flip=True,
        fill_mode='nearest'
)

## Unzip Folders
Run the below code if the data is uploaded in zipped folders
TODO: Right now the code will throw an error if anything other than zip files are in the dataset directory. Make it so that this does not happen. Should the code simply ignore other files or tell the user? Idk yet

In [10]:
from os import listdir
from zipfile import ZipFile
unzipped_folder_name = 'Extracted_Data'
if DATA_DIR.find(unzipped_folder_name) != -1: #This fixes a directory error if this cell is run multiple times
    DATA_DIR = DATA_DIR_BACKUP


filenames = listdir(DATA_DIR)

if unzipped_folder_name in filenames:
    print("Extracted Data folder already exists. Skipping this process.")
else:
    if data_is_zipped:
        for f in filenames:
            path_to_zip = f'{DATA_DIR}/{f}'
            path_to_downloaded_file = tf.keras.utils.get_file(
              f'{DATA_DIR}/{f}',
              'file://'+path_to_zip,
              extract=True)
            
            directory_to_extract_to = DATA_DIR+'/'+unzipped_folder_name+'/'+f[:-3]
            with ZipFile(path_to_zip, 'r') as zip_ref:
                zip_ref.extractall(directory_to_extract_to)
                
if DATA_DIR.find(unzipped_folder_name) == -1:
    DATA_DIR = DATA_DIR + '/' + unzipped_folder_name #changes data dir to extraction location

Extracted Data folder already exists. Skipping this process.


## Merge Subfolders into 'Data'
Usually it is easiest to break a dataset into multiple zip files to upload it. The below code will merge the extracted data into a single folder.

#### _YOU NEED LABELS IN ORDER TO DO THIS_
Do not run this if your data is uploaded with each class in a single zip file.

In [11]:
if LABELED_DATA == True:
    from functions import merge_subfolders
    source_folder = DATA_DIR + '/' + unzipped_folder_name
    destination_folder = DATA_DIR + '/merged_dataset'
    
    merge_subfolders(source_folder, destination_folder)

### Make Directory from Labels

In [12]:
if LABELED_DATA == True:
    import shutil
    
    def extract_base_filename(file_entry):
        return file_entry.split(',')[0]
    
    def move_files_to_folders(file_list):
        for file_entry in file_list:
            filename, folder_num = file_entry.split(',')
            folder_num = folder_num.strip()
            folder_path = os.path.join("./Data", str(folder_num))
    
            if not os.path.exists(folder_path):
                os.makedirs(folder_path)
    
            src_path = os.path.join("./Data_2", filename)
            dst_path = os.path.join(folder_path, filename)
            try:
              shutil.move(src_path, dst_path)
              print(f"Moved {filename} to {folder_path}")
            except:
              print("Img not found")
    
    with open(labels_path, "r") as f:
        file_list = f.readlines()
    
    move_files_to_folders(file_list)

In [13]:
#!rm -rf ./Data/.ipynb_checkpoints #deletes checkpoint artifact from dataset folder

In [14]:
print(DATA_DIR)

/home/christian/Desktop/Creates/CREATEs Rework/Rework V2/Dataset/Extracted_Data


In [15]:
train_generator = data_generator.flow_from_directory(DATA_DIR, target_size=im_shape, shuffle=True, seed=seed,
                                                     class_mode='categorical', batch_size=BATCH_SIZE, subset="training")
validation_generator = data_generator.flow_from_directory(DATA_DIR, target_size=im_shape, shuffle=False, seed=seed,
                                                     class_mode='categorical', batch_size=BATCH_SIZE, subset="validation")


nb_train_samples = train_generator.samples
nb_validation_samples = validation_generator.samples
classes = list(train_generator.class_indices.keys())
print('Classes: '+str(classes))
num_classes  = len(classes)

test_data_list = []
test_labels_list = []

for i in range(len(validation_generator)):
    data_batch, labels_batch = validation_generator[i]
    test_data_list.append(data_batch)
    test_labels_list.append(labels_batch)

test_data = np.vstack(test_data_list)
test_labels = np.vstack(test_labels_list)


Found 583 images belonging to 2 classes.
Found 145 images belonging to 2 classes.
Classes: ['0', '1']


In [16]:
from collections import Counter
counter = Counter(train_generator.classes)
max_val = float(max(counter.values()))
class_weights = {class_id : nb_train_samples/(num_images * num_classes) for class_id, num_images in counter.items()}

In [17]:
steps_per_epoch = nb_train_samples // BATCH_SIZE
validation_steps = nb_validation_samples // BATCH_SIZE
print("Steps per epoch", steps_per_epoch)
print("Validation Steps", validation_steps)

Steps per epoch 194
Validation Steps 48


In [18]:
print(class_weights)

{0: 0.9024767801857585, 1: 1.1211538461538462}


# Create New Model

In [19]:
if continue_previous_training == False:
  transfer_model = ResNet50(weights='imagenet', include_top=False, input_shape=(image_width,image_height,3))
  for l in transfer_model.layers:
    l.trainable = False

  #Add new layers to tail of model
  model = transfer_model.output
  model = tf.keras.layers.GlobalAveragePooling2D()(model)
  model = tf.keras.layers.Dense(128, activation='relu')(model)
  model = tf.keras.layers.Dropout(rate=0.2)(model)
  model = tf.keras.layers.Dense(128, activation='relu')(model)
  model = tf.keras.layers.Dropout(rate=0.2)(model)
  model = tf.keras.layers.Dense(num_classes, activation='softmax')(model)
  model = tf.keras.models.Model(inputs = transfer_model.input, outputs = model)

Or Load old model

In [20]:
if continue_previous_training == True:
  model = keras.models.load_model(f"{MODEL_SAVE_DIR}/models/{MODEL_NAME}_last_10.h5")

In [21]:
show_summary = False
if show_summary:
  model.summary()

In [22]:
precision_metrics = tf.keras.metrics.Precision(name="precision")
recall_metrics = tf.keras.metrics.Recall(name="recall")
accuracy_metrics = tf.keras.metrics.CategoricalAccuracy(name='accuracy')

In [23]:
model.compile(loss=wandb.config.loss,optimizer = optimizer, metrics= [accuracy_metrics, precision_metrics, recall_metrics])

### Redo this cell \/

In [24]:
filepath = "/saved-model-{epoch:02d}-{val_accuracy:.2f}"

checkpoint = ModelCheckpoint(MODEL_SAVE_DIR+MODEL_NAME+ filepath , monitor="val_accuracy",save_best_only=False, mode="auto", verbose=0)
checkpoint2 = ModelCheckpoint(MODEL_SAVE_DIR+MODEL_NAME+filepath , monitor="val_accuracy",save_best_only=True, mode="auto", verbose=1)
reduce_lr = ReduceLROnPlateau(monitor = 'val_accuracy', factor = 0.3, patience = 2, min_delta = 0.001,
                              mode='auto',verbose=1)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=3,verbose=1)

In [25]:
class PRMetrics(Callback):
  """ Custom callback to compute metrics at the end of each training epoch"""
  def __init__(self, generator=None, num_log_batches=1):
    self.generator = generator
    self.num_batches = num_log_batches
    # store full names of classes
    self.flat_class_names = [k for k, v in generator.class_indices.items()]

  def on_train_end(self, logs={}):
    # collect validation data and ground truth labels from generator
    val_data, val_labels = zip(*(self.generator[i] for i in range(self.num_batches)))
    val_data, val_labels = np.vstack(val_data), np.vstack(val_labels)
    # Load the best checkpoint model
    model.load_weights(MODEL_SAVE_DIR+'/models/60_Epochs_Experiments/'+MODEL_NAME+ '/best_val_acc_model.h5')
    # use the trained model to generate predictions for the given number
    # of validation data batches (num_batches)
    val_predictions = self.model.predict(val_data)
    ground_truth_class_ids = val_labels.argmax(axis=1)
    # take the argmax for each set of prediction scores
    # to return the class id of the highest confidence prediction
    top_pred_ids = val_predictions.argmax(axis=1)

    # Log confusion matrix
    # the key "conf_mat" is the id of the plot--do not change
    # this if you want subsequent runs to show up on the same plot
    wandb.log({"conf_mat" : wandb.plot.confusion_matrix(probs=None,
                            preds=top_pred_ids, y_true=ground_truth_class_ids,
                            class_names=self.flat_class_names)})
    wandb.log({"roc_curve" : wandb.plot.roc_curve(ground_truth_class_ids, val_predictions, labels=self.flat_class_names)})

GradCam Callback
Ref: https://www.kaggle.com/ayuraj/gradcam-implementation-visualization-in-tf-w-b

In [26]:
class UnfreezeCallback(keras.callbacks.Callback):
    def __init__(self, unfreeze_epoch):
        super(UnfreezeCallback, self).__init__()
        self.unfreeze_epoch = unfreeze_epoch

    def on_epoch_end(self, epoch, logs=None):
        if epoch == self.unfreeze_epoch:
            print("Unfreezing the whole model...")
            model = self.model
            for layer in model.layers:
                layer.trainable = True
            model.compile(loss=wandb.config.loss, optimizer = optimizer, metrics= [accuracy_metrics, precision_metrics, recall_metrics])

In [27]:

callbacks = [
    WandbCallback(input_type='image', training_data=train_generator),
    checkpoint2
    
    # tensorboard,
    #UnfreezeCallback(10), #May be causing issues
    #PRMetrics(validation_generator, wandb.config.num_log_batches),
    # GRADCamLogger(test_generator, layer_name='stem_conv1'),
    # checkpoint,
    # reduce_lr,
    # early_stopping,
]



In [None]:

history = model.fit(
    train_generator,
    callbacks=callbacks,
    steps_per_epoch = steps_per_epoch,
    epochs = EPOCHS,
    validation_data = validation_generator,
    verbose = 1,
    validation_steps = validation_steps,
    class_weight=class_weights
  )
#pass a generator to the wandb callback
#Find a way to save the model as a savedmodel instead of an h5

Epoch 1/2

  saving_api.save_model(


INFO:tensorflow:Assets written to: /home/christian/Desktop/Creates/CREATEs Rework/Rework V2/wandb/offline-run-20240305_105442-asoxpy1a/files/model-best/assets


INFO:tensorflow:Assets written to: /home/christian/Desktop/Creates/CREATEs Rework/Rework V2/wandb/offline-run-20240305_105442-asoxpy1a/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/home/christian/Desktop/Creates/CREATEs Rework/Rework V2/wandb/offline-run-20240305_105442-asoxpy1a/files/model-best)... Done. 0.2s



Epoch 1: val_accuracy improved from -inf to 0.55556, saving model to ./models/Resnet50-P/saved-model-01-0.56
INFO:tensorflow:Assets written to: ./models/Resnet50-P/saved-model-01-0.56/assets


INFO:tensorflow:Assets written to: ./models/Resnet50-P/saved-model-01-0.56/assets


Epoch 2/2
 12/194 [>.............................] - ETA: 20:23 - loss: 0.7226 - accuracy: 0.4706 - precision: 0.4706 - recall: 0.4706

In [None]:
#save the last model
model_save = False
if model_save == True:
    model.save(f'{MODEL_SAVE_DIR}{MODEL_NAME}_{EPOCHS}')

In [None]:
wandb.finish()

In [None]:
import plotting import *
import matplotlib.cm as cm

In [None]:
# Extracting False Negatives
def evaluate_model(model, data, labels):
  false_predictions = []
  count = 0
  for img, label in zip(data, labels):
    image = image = tf.expand_dims(img, axis=0)
    predicted = model.predict(image)
    true_label= np.argmax(label)
    predicted_label = np.argmax(predicted)
    confidence = predicted[0][predicted_label]
    count +=1

    if count%20==0:
      false_predictions.append([img, true_label, predicted_label, confidence, predicted])
  return false_predictions

In [None]:
def sove_plots(false_predictions, model, last_conv_layer_name):
  for i, sample in enumerate(false_predictions):
    fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(20, 10))
    plt.subplots_adjust(bottom=0)
    img, true_label, predicted_label, trust, prediction = sample[0], sample[1], sample[2], sample[3], sample[4][0]

    im_class = classes[true_label]
    true_label_class = r"True Label: $\bf{" + str(classes[true_label]) + "}$"
    predicted_label_class = r"Predicted Label: $\it{"+ str(classes[predicted_label]) + "}$"

    confidence = "Confidence: " + str(trust)

    img_array = get_img_array(img)
    # Remove last layer's softmax
    model.layers[-1].activation = None
    # Print what the top predicted class is
    preds = model.predict(img_array)
    title = "{} \n {} \n {} \n".format(
        true_label_class, predicted_label_class, confidence)
    plt.axis('off')
    heatmap = gradcam_heatmap(img_array, model, last_layer_name)
    heatmap = np.reshape(heatmap, (7,7))
    display_gradcam(img, heatmap, preds=preds[0], plot=ax1)
    _ = ax2.imshow(img)
    _ = ax3.imshow(heatmap)
    ax1.set_title("GradCam")
    ax2.set_title(title)
    ax3.set_title('Attention Map')
    plt.show()
    plt.close()
    print('------------')


In [None]:
import plotting
import importlib
importlib.reload(plotting)

In [None]:
plotting.plot_history_acc_loss(history)

In [None]:
plot_history_precision_recall(history)

In [None]:
def print_confusion_matrix(confusion_matrix, class_names, figsize = (12, 12), fontsize=14):
    group_counts = ["{0:0.0f}".format(value) for value in
                    confusion_matrix.flatten()]

    group_percentages = ["{0:.2%}".format(value) for value in
                         confusion_matrix.flatten() / np.sum(confusion_matrix)]

    labels = [f"{v2}\n{v3}" for v2, v3 in
              zip(group_counts, group_percentages)]
    labels = np.asarray(labels).reshape(len(class_names), len(class_names))


    df_cm = pd.DataFrame(
        confusion_matrix, index=class_names, columns=class_names,
    )
    fig = plt.figure(figsize=figsize)
    try:
        heatmap = sns.heatmap(df_cm, annot=labels, fmt='', cmap='Blues')

    except ValueError:
        raise ValueError("Confusion matrix values must be integers.")
    heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
    heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    # Note that due to returning the created figure object, when this funciton is called in a notebook
    # the figure willl be printed twice. To prevent this, either append ; to your function call, or
    # modify the function by commenting out this return expression.
    return fig

In [None]:
test_predictions = model.predict(test_data)
ground_truth_class_ids = test_labels.argmax(axis=1)

# take the argmax for each set of prediction scores
# to return the class id of the highest confidence prediction
top_pred_ids = test_predictions.argmax(axis=1)
conf_mat = confusion_matrix(ground_truth_class_ids, top_pred_ids)
figure = print_confusion_matrix(conf_mat, classes)
plt.title('Confusion Matrix')
plt.show()



In [None]:
predictions = evaluate_model(model, test_data, test_labels)

In [None]:
last_layer_name = 'conv5_block3_out'

In [None]:
sove_plots(predictions, model, last_layer_name)