# Base imports and functions

In [None]:
!pip install --upgrade tensorflow_hub

In [None]:
!pip install --upgrade tensorflow_addons

In [None]:
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras
from keras.models import Sequential
from keras import layers
from keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPool2D, Activation
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
import numpy as np
from sklearn.utils import class_weight
import pandas as pd
import datetime
import tensorflow_hub as hub
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix
from sklearn.metrics import classification_report
from PIL import Image

In [None]:
from keras import mixed_precision

mixed_precision.set_global_policy("mixed_float16")

In [None]:
def view_random_image(target_dir, target_class):
  target_folder = target_dir + target_class
  random_img = random.sample(os.listdir(target_folder), 1)
  img = mpimg.imread(target_folder + "/" + random_img[0])
  plt.imshow(img)
  plt.title(target_class)
  plt.axis("off")

  print(f"Image shape: {img.shape}")
  return img


In [None]:
def create_tensorboard_callback(dir_name, experiment_name):
  log_dir = dir_name + "/" + experiment_name + "/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
  tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir)
  print(f"Saving to: {log_dir}")
  return tensorboard_callback

In [None]:
def generate_class_weights(class_series, multi_class=True, one_hot_encoded=False):
  """
  Method to generate class weights given a set of multi-class or multi-label labels, both one-hot-encoded or not.
  Some examples of different formats of class_series and their outputs are:
    - generate_class_weights(['mango', 'lemon', 'banana', 'mango'], multi_class=True, one_hot_encoded=False)
    {'banana': 1.3333333333333333, 'lemon': 1.3333333333333333, 'mango': 0.6666666666666666}
    - generate_class_weights([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0]], multi_class=True, one_hot_encoded=True)
    {0: 0.6666666666666666, 1: 1.3333333333333333, 2: 1.3333333333333333}
    - generate_class_weights([['mango', 'lemon'], ['mango'], ['lemon', 'banana'], ['lemon']], multi_class=False, one_hot_encoded=False)
    {'banana': 1.3333333333333333, 'lemon': 0.4444444444444444, 'mango': 0.6666666666666666}
    - generate_class_weights([[0, 1, 1], [0, 0, 1], [1, 1, 0], [0, 1, 0]], multi_class=False, one_hot_encoded=True)
    {0: 1.3333333333333333, 1: 0.4444444444444444, 2: 0.6666666666666666}
  The output is a dictionary in the format { class_label: class_weight }. In case the input is one hot encoded, the class_label would be index
  of appareance of the label when the dataset was processed.
  In multi_class this is np.unique(class_series) and in multi-label np.unique(np.concatenate(class_series)).
  Author: Angel Igareta (angel@igareta.com)
  """
  if multi_class:
    # If class is one hot encoded, transform to categorical labels to use compute_class_weight
    if one_hot_encoded:
      class_series = np.argmax(class_series, axis=1)

    # Compute class weights with sklearn method
    class_labels = np.unique(class_series)
    class_weights = compute_class_weight(class_weight='balanced', classes=class_labels, y=class_series)
    return dict(zip(class_labels, class_weights))
  else:
    # It is neccessary that the multi-label values are one-hot encoded
    mlb = None
    if not one_hot_encoded:
      mlb = MultiLabelBinarizer()
      class_series = mlb.fit_transform(class_series)

    n_samples = len(class_series)
    n_classes = len(class_series[0])

    # Count each class frequency
    class_count = [0] * n_classes
    for classes in class_series:
        for index in range(n_classes):
            if classes[index] != 0:
                class_count[index] += 1

    # Compute class weights using balanced method
    class_weights = [n_samples / (n_classes * freq) if freq > 0 else 1 for freq in class_count]
    class_labels = range(len(class_weights)) if mlb is None else mlb.classes_
    return dict(zip(class_labels, class_weights))

In [None]:
def plot_loss_curves(history):
  """
  Returns separate loss curves for training and validation metrics.
  """
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  accuracy = history.history['accuracy']
  val_accuracy = history.history['val_accuracy']

  epochs = range(len(history.history['loss']))

  # Plot loss
  plt.plot(epochs, loss, label='training_loss')
  plt.plot(epochs, val_loss, label='val_loss')
  plt.title('Loss')
  plt.xlabel('Epochs')
  plt.legend()

  # Plot accuracy
  plt.figure()
  plt.plot(epochs, accuracy, label='training_accuracy')
  plt.plot(epochs, val_accuracy, label='val_accuracy')
  plt.title('Accuracy')
  plt.xlabel('Epochs')
  plt.legend();

In [None]:
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
def plot_metrics(history):
  metrics = ['loss', 'prc', 'precision', 'recall']
  for n, metric in enumerate(metrics):
    name = metric.replace("_"," ").capitalize()
    plt.subplot(2,2,n+1)
    plt.plot(history.epoch, history.history[metric], color=colors[0], label='Train')
    plt.plot(history.epoch, history.history['val_'+metric],
             color=colors[0], linestyle="--", label='Val')
    plt.xlabel('Epoch')
    plt.ylabel(name)
    if metric == 'loss':
      plt.ylim([0, plt.ylim()[1]])
    elif metric == 'auc':
      plt.ylim([0.8,1])
    else:
      plt.ylim([0,1])

    plt.legend();

In [None]:
import sklearn
from sklearn.metrics import confusion_matrix
import seaborn as sns

def plot_roc(name, labels, predictions, **kwargs):
  fp, tp, _ = sklearn.metrics.roc_curve(labels, predictions)

  plt.plot(100*fp, 100*tp, label=name, linewidth=2, **kwargs)
  plt.xlabel('False positives [%]')
  plt.ylabel('True positives [%]')
  plt.xlim([-0.5,20])
  plt.ylim([80,100.5])
  plt.grid(True)
  ax = plt.gca()
  ax.set_aspect('equal')

In [None]:
def create_model (model_url, num_classes=8):
  feature_extractor_layer = hub.KerasLayer(model_url,
                                           trainable = False,
                                           dtype=tf.float32,
                                           name="feature_extraction_layer",
                                           input_shape = (384,384,3))
  model = tf.keras.Sequential([
      feature_extractor_layer,
      tf.keras.layers.Dense(num_classes, activation="sigmoid", name="output_layer")
  ])
  return model

In [None]:
def pred_and_plot(model, filename, true_label, class_names):
  """
  Imports an image located at filename, makes a prediction on it with
  a trained model and plots the image with the predicted class as the title.
  """
  # Import the target image and preprocess it
  img = filename

  # Make a prediction
  pred = model.predict(tf.expand_dims(img, axis=0))

  for i in range(len(class_names)):
    print(f"Prediction: {class_names[i]} {pred[0][i]*100:.2f}%, True: {true_label[i]}")

  # predicted_labels = [class_names[i] for i in range(len(class_names)) if pred[0][i] >= 0.5]

  # Plot the image and predicted class
  plt.imshow(img)
  plt.axis(False);

In [None]:
def plot_multi_label_confusion_matrix(true_labels, predicted_labels, class_labels):
    # Calculate the confusion matrix
    conf_matrix = multilabel_confusion_matrix(true_labels, predicted_labels)

    num_classes = len(class_labels)
    num_rows = (num_classes + 3) // 4  # Calculate the number of rows needed for plotting

    # Plot the confusion matrix
    fig, axes = plt.subplots(nrows=num_rows, ncols=4, figsize=(5 * min(num_classes, 4), 5 * num_rows))

    for i in range(num_rows):
        for j in range(4):
            ax = axes[i, j]
            class_index = i * 4 + j
            if class_index < num_classes:
                ax.imshow(conf_matrix[class_index], cmap='Blues', interpolation='nearest')
                ax.set_title('Confusion Matrix for {}'.format(class_labels[class_index]))
                ax.set_xticks([0, 1])
                ax.set_yticks([0, 1])
                ax.set_xticklabels(['Predicted: 0', 'Predicted: 1'])
                ax.set_yticklabels(['True: 0', 'True: 1'])

                for k in range(2):
                    for l in range(2):
                        ax.text(l, k, str(conf_matrix[class_index][k][l]), ha='center', va='center', color='red')
            else:
                ax.axis('off')  # Turn off empty subplots

    plt.tight_layout()
    plt.show()

# Data import and preprocessing

In [None]:
import zipfile

my_files = zipfile.ZipFile("/content/drive/MyDrive/data_one_hot.zip")

my_files.extractall()
my_files.close()

In [None]:
df=pd.read_csv("/content/data_one_hot/Zebrametadata.csv", sep=',')
columns=["Dead", "Edema", "Kyphosis", "Lordosis", "Normal", "Scoliosis", "Unhatched", "Yolk deformation"]

In [None]:
print(df.head())

In [None]:
# Count occurrences of each class
class_counts = df.iloc[:, 1:].sum()

# Plot the distribution - Bar Plot
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
bars = class_counts.plot(kind='bar', color='skyblue')
plt.title('Distribution of Classes (Bar Plot)')
plt.xlabel('Classes')
plt.ylabel('Count')
plt.xticks(rotation=45)

# Add counts on top of the bars
for bar in bars.patches:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval, round(yval, 2), ha='center', va='bottom')

# Plot the distribution - Pie Chart
plt.subplot(1, 2, 2)
class_counts.plot(kind='pie', autopct='%1.1f%%', colors=['lightcoral', 'lightgreen'])
plt.title('Distribution of Classes (Pie Chart)')
plt.ylabel('')

plt.tight_layout()
plt.show()

In [None]:
img_height = img_width = 384
batch_size = 32

datagen=ImageDataGenerator(
    rescale=1.0/255.,
 )

datagen_augmented=ImageDataGenerator(
    rescale=1.0/255.,
    rotation_range=0.5,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

train_data = datagen.flow_from_dataframe(
  dataframe=df[:3387],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

train_data_augmented=datagen_augmented.flow_from_dataframe(
  dataframe=df[:3387],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

validation_gen=datagen.flow_from_dataframe(
  dataframe=df[3387:3763],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

test_gen=datagen.flow_from_dataframe(
  dataframe=df[3763:],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  seed=42,
  shuffle=False,
  class_mode="raw",
  target_size=(img_height,img_width))

In [None]:
augmented_images, augmented_labels = train_data_augmented.next()
class_names = ["Dead", "Edema", "Kyphosis", "Lordosis", "Normal", "Scoliosis", "Unhatched", "Yolk deformation"]

In [None]:
train_images, train_labels = train_data.next()

In [None]:
test_images, test_labels = test_gen.next()

In [None]:
nobs = 8 # Maximum number of images to display
ncols = 4 # Number of columns in display
nrows = nobs//ncols # Number of rows in display

plt.figure(figsize=(12,4*nrows))
for i in range(nrows*ncols):

    ax = plt.subplot(nrows, ncols, i+1)
    plt.imshow(train_images[i])
    plt.title(str(f"{train_labels[i]}"), size=10)
    plt.axis('off')

In [None]:
METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'),
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'),
      tfa.metrics.F1Score(num_classes=8, average='macro')
]

In [None]:
METRICS2 = [
      tfa.metrics.F1Score(num_classes=8, average='macro'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR')
]

In [None]:
class_weights_calc = generate_class_weights(train_labels, multi_class=False, one_hot_encoded=True)
print(class_weights_calc)

In [None]:
print(augmented_images.shape)

In [None]:
random_number = random.randint(0, 31) # we're making batches of size 32, so we'll get a random instance

plt.figure()
plt.imshow(augmented_images[random_number])
plt.title(f"Augmented image")
plt.axis(False);

print(augmented_labels[random_number])

# Experiment Kyphosis vs Scoliosis


In [None]:
METRICS2 = [
      tfa.metrics.F1Score(num_classes=2, average='macro'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
]

In [None]:
import zipfile

my_files = zipfile.ZipFile("/content/drive/MyDrive/data_one_hot.zip")

my_files.extractall()
my_files.close()

In [None]:
df=pd.read_csv("/content/drive/MyDrive/Zebrametadata_KvsS.csv", sep=',')
columns=["Kyphosis", "Scoliosis"]

In [None]:
img_height = img_width = 384
batch_size = 32

datagen=ImageDataGenerator(
    rescale=1.0/255.,
 )

datagen_augmented=ImageDataGenerator(
    rescale=1.0/255.,
    rotation_range=0.5,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

train_data = datagen.flow_from_dataframe(
  dataframe=df[:319],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

train_data_augmented=datagen_augmented.flow_from_dataframe(
  dataframe=df[:319],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

validation_gen=datagen.flow_from_dataframe(
  dataframe=df[319:355],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

test_gen=datagen.flow_from_dataframe(
  dataframe=df[355:],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  seed=42,
  shuffle=False,
  class_mode="raw",
  target_size=(img_height,img_width))

In [None]:
train_images, train_labels = train_data.next()

In [None]:
train_labels.shape

In [None]:
model_experiment = Sequential([
    Conv2D(10,3,input_shape=(384, 384, 3)),
    Activation(activation="relu"),
    Conv2D(10,3,activation="relu"),
    MaxPool2D(),
    Conv2D(10,3,activation="relu"),
    Conv2D(10,3,activation="relu"),
    MaxPool2D(),
    Flatten(),
    Dense(1, activation="sigmoid")
])


model_experiment.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics="accuracy")

In [None]:
history_kyphosis_scoliosis=model_experiment.fit(
            train_data,
            epochs=5,
            steps_per_epoch=len(train_data),
            validation_data=validation_gen,
            validation_steps=len(validation_gen))

In [None]:
model_experiment.evaluate(test_gen)

In [None]:
plot_loss_curves(history_kyphosis_scoliosis)

In [None]:
test_gen.reset()

In [None]:
test_images,test_labels = test_gen.next()

In [None]:
random_number = random.randint(0, 31)

test_labels[1][1]

In [None]:
pred=model_experiment.predict(test_gen,
verbose=1)
preds = np.where(pred<0.5,0,1)

In [None]:
pred=model_experiment.predict(test_gen,
verbose=1)
preds = np.where(pred<0.5,0,1)
true_labels_modified = np.where(test_gen.labels[:, 1] == 1, 1, 0)

classification_report_dict = classification_report(true_labels_modified, preds, output_dict=True)
print(classification_report_dict)
# Create empty dictionary
class_f1_scores = {}
# Loop through classification report items
for k, v in classification_report_dict.items():
  if k == "accuracy": # stop once we get to accuracy key
    break
  else:
    # Append class names and f1-scores to new dictionary
    class_f1_scores[columns[int(k)]] = v["f1-score"]

f1_scores = pd.DataFrame({"class_name": list(class_f1_scores.keys()),
                          "f1-score": list(class_f1_scores.values())}).sort_values("f1-score", ascending=False)


fig, ax = plt.subplots(figsize=(5, 5))
scores = ax.barh(range(len(f1_scores)), f1_scores["f1-score"].values)
ax.set_yticks(range(len(f1_scores)))
ax.set_yticklabels(list(f1_scores["class_name"]))
ax.set_xlabel("f1-score")
ax.set_title("F1-Scores for 2 Different Classes")
ax.invert_yaxis(); # reverse the order

def autolabel(rects): # Modified version of: https://matplotlib.org/examples/api/barchart_demo.html
  """
  Attach a text label above each bar displaying its height (it's value).
  """
  for rect in rects:
    width = rect.get_width()
    ax.text(1.03*width, rect.get_y() + rect.get_height()/1.5,
            f"{width:.2f}",
            ha='center', va='bottom')

autolabel(scores)


# Experiment Kyphosis vs Lordosis


In [None]:
METRICS2 = [
      tfa.metrics.F1Score(num_classes=2, average='macro'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
]

In [None]:
df=pd.read_csv("/content/drive/MyDrive/Zebrametadata_KvsL.csv", sep=',')
columns=["Kyphosis", "Lordosis"]

In [None]:
img_height = img_width = 384
batch_size = 32

datagen=ImageDataGenerator(
    rescale=1.0/255.,
 )

datagen_augmented=ImageDataGenerator(
    rescale=1.0/255.,
    rotation_range=0.5,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

train_data = datagen.flow_from_dataframe(
  dataframe=df[:472],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

train_data_augmented=datagen_augmented.flow_from_dataframe(
  dataframe=df[:472],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

validation_gen=datagen.flow_from_dataframe(
  dataframe=df[472:525],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

test_gen=datagen.flow_from_dataframe(
  dataframe=df[525:],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  seed=42,
  shuffle=False,
  class_mode="raw",
  target_size=(img_height,img_width))

In [None]:
train_images, train_labels = train_data.next()

In [None]:
train_labels.shape

In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
model_experiment = Sequential([
    Conv2D(10,3,input_shape=(384, 384, 3)),
    Activation(activation="relu"),
    Conv2D(10,3,activation="relu"),
    MaxPool2D(),
    Conv2D(10,3,activation="relu"),
    Conv2D(10,3,activation="relu"),
    MaxPool2D(),
    Flatten(),
    Dense(1, activation="sigmoid")
])


model_experiment.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics="accuracy")

In [None]:
history_kyphosis_lordosis=model_experiment.fit(
            train_data,
            epochs=5,
            steps_per_epoch=len(train_data),
            validation_data=validation_gen,
            validation_steps=len(validation_gen))

In [None]:
model_experiment.evaluate(test_gen)

In [None]:
plot_loss_curves(history_kyphosis_scoliosis)

In [None]:
test_gen.reset()

In [None]:
test_images,test_labels = test_gen.next()

In [None]:
random_number = random.randint(0, 31)

test_labels[1][1]

1

In [None]:
pred=model_experiment.predict(test_gen,
verbose=1)
preds = np.where(pred<0.5,0,1)



In [None]:
pred=model_experiment.predict(test_gen,
verbose=1)
preds = np.where(pred<0.5,0,1)
true_labels_modified = np.where(test_gen.labels[:, 1] == 1, 1, 0)

classification_report_dict = classification_report(true_labels_modified, preds, output_dict=True)
print(classification_report_dict)
# Create empty dictionary
class_f1_scores = {}
# Loop through classification report items
for k, v in classification_report_dict.items():
  if k == "accuracy": # stop once we get to accuracy key
    break
  else:
    # Append class names and f1-scores to new dictionary
    class_f1_scores[columns[int(k)]] = v["f1-score"]

f1_scores = pd.DataFrame({"class_name": list(class_f1_scores.keys()),
                          "f1-score": list(class_f1_scores.values())}).sort_values("f1-score", ascending=False)


fig, ax = plt.subplots(figsize=(5, 5))
scores = ax.barh(range(len(f1_scores)), f1_scores["f1-score"].values)
ax.set_yticks(range(len(f1_scores)))
ax.set_yticklabels(list(f1_scores["class_name"]))
ax.set_xlabel("f1-score")
ax.set_title("F1-Scores for 2 Different Classes")
ax.invert_yaxis(); # reverse the order

def autolabel(rects): # Modified version of: https://matplotlib.org/examples/api/barchart_demo.html
  """
  Attach a text label above each bar displaying its height (it's value).
  """
  for rect in rects:
    width = rect.get_width()
    ax.text(1.03*width, rect.get_y() + rect.get_height()/1.5,
            f"{width:.2f}",
            ha='center', va='bottom')

autolabel(scores)


# Experiment Lordosis vs Scoliosis


In [None]:
METRICS2 = [
      tfa.metrics.F1Score(num_classes=2, average='macro'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
]

In [None]:
df=pd.read_csv("/content/drive/MyDrive/Zebrametadata_LvsS.csv", sep=',')
columns=["Lordosis", "Scoliosis"]

In [None]:
img_height = img_width = 384
batch_size = 32

datagen=ImageDataGenerator(
    rescale=1.0/255.,
 )

datagen_augmented=ImageDataGenerator(
    rescale=1.0/255.,
    rotation_range=0.5,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

train_data = datagen.flow_from_dataframe(
  dataframe=df[:430],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

train_data_augmented=datagen_augmented.flow_from_dataframe(
  dataframe=df[:430],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

validation_gen=datagen.flow_from_dataframe(
  dataframe=df[430:478],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  batch_size=batch_size,
  seed=42,
  shuffle=True,
  class_mode="raw",
  target_size=(img_height,img_width))

test_gen=datagen.flow_from_dataframe(
  dataframe=df[478:],
  directory="/content/data_one_hot/images",
  x_col="Filename",
  y_col=columns,
  seed=42,
  shuffle=False,
  class_mode="raw",
  target_size=(img_height,img_width))

In [None]:
train_images, train_labels = train_data.next()

In [None]:
train_labels.shape

In [None]:
len(train_data)

In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
model_experiment = Sequential([
    Conv2D(10,3,input_shape=(384, 384, 3)),
    Activation(activation="relu"),
    Conv2D(10,3,activation="relu"),
    MaxPool2D(),
    Conv2D(10,3,activation="relu"),
    Conv2D(10,3,activation="relu"),
    MaxPool2D(),
    Flatten(),
    Dense(1, activation="sigmoid")
])


model_experiment.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                metrics="accuracy")

In [None]:
history_lordosis_scoliosis=model_experiment.fit(
            train_data,
            epochs=5,
            steps_per_epoch=len(train_data),
            validation_data=validation_gen,
            validation_steps=len(validation_gen))

In [None]:
model_experiment.evaluate(test_gen)

In [None]:
plot_loss_curves(history_kyphosis_scoliosis)

In [None]:
test_gen.reset()

In [None]:
test_images,test_labels = test_gen.next()

In [None]:
random_number = random.randint(0, 31)

test_labels[1][1]

In [None]:
pred=model_experiment.predict(test_gen,
verbose=1)
preds = np.where(pred<0.5,0,1)

In [None]:
pred=model_experiment.predict(test_gen,
verbose=1)
preds = np.where(pred<0.5,0,1)
true_labels_modified = np.where(test_gen.labels[:, 1] == 1, 1, 0)

classification_report_dict = classification_report(true_labels_modified, preds, output_dict=True)
print(classification_report_dict)
# Create empty dictionary
class_f1_scores = {}
# Loop through classification report items
for k, v in classification_report_dict.items():
  if k == "accuracy": # stop once we get to accuracy key
    break
  else:
    # Append class names and f1-scores to new dictionary
    class_f1_scores[columns[int(k)]] = v["f1-score"]

f1_scores = pd.DataFrame({"class_name": list(class_f1_scores.keys()),
                          "f1-score": list(class_f1_scores.values())}).sort_values("f1-score", ascending=False)


fig, ax = plt.subplots(figsize=(5, 5))
scores = ax.barh(range(len(f1_scores)), f1_scores["f1-score"].values)
ax.set_yticks(range(len(f1_scores)))
ax.set_yticklabels(list(f1_scores["class_name"]))
ax.set_xlabel("f1-score")
ax.set_title("F1-Scores for 2 Different Classes")
ax.invert_yaxis(); # reverse the order

def autolabel(rects): # Modified version of: https://matplotlib.org/examples/api/barchart_demo.html
  """
  Attach a text label above each bar displaying its height (it's value).
  """
  for rect in rects:
    width = rect.get_width()
    ax.text(1.03*width, rect.get_y() + rect.get_height()/1.5,
            f"{width:.2f}",
            ha='center', va='bottom')

autolabel(scores)
