In [None]:
# Importing essential libraries
import os  # Operating system related functions
import numpy as nu  # Library for numerical computing 
import pandas as ps  # Library for manipulation 
import seaborn as sb  # Library of visualizating the data
import tensorflow as tnf  # framework deep learning 
import itertools  # Utility functions to iterate
from tensorflow.keras.applications.densenet import DenseNet201  # Importing pre-trained model
from tensorflow.keras.preprocessing import image  #  Preprocessing image
from tensorflow.keras.applications.densenet import preprocess_input  # Preprocessing function for DenseNet201
from tensorflow.keras.layers import Dense, Dropout  # Neural network
from tensorflow.keras.models import Sequential  # Sequential neural network model
from tensorflow.keras.regularizers import l2  # Regularization l2
from keras.utils.vis_utils import plot_model  # Visualization of Model 
from sklearn.model_selection import train_test_split  # Splitting the dataset
from sklearn.metrics import classification_report  # classification report importing 
from sklearn.metrics import confusion_matrix  # Confusion matrix importing
from sklearn.metrics import matthews_corrcoef as MTC  # Matthews correlation coefficient importing
from sklearn.metrics import balanced_accuracy_score as BS  # Balanced accuracy score importing
from sklearn.preprocessing import OneHotEncoder  # One-hot encoding
from sklearn.utils import shuffle  # Shuffling data
import tensorflow_addons as tnfa  # Additional TensorFlow addons
import matplotlib.pyplot as mplt  # Plotting library
import matplotlib.gridspec as gridspec  # Subplots grid specification 
from distutils.dir_util import copy_tree, remove_tree  # Directory manipulation
from tensorflow.keras.preprocessing.image import ImageDataGenerator as IDGN  # Importing image data generator for data augmentation

In [None]:
# Define a list of class labels, in this case, "damage" and "no_damage"
h_class = ["damage", "no_damage"]

# Define the base path to a directory containing satellite images of hurricane damage
h_pth = "/kaggle/input/satellite-images-of-hurricane-damage/train_another"

# Create a list of file paths by joining the base path with specific image file names
h_fpth = [
    os.path.join(h_pth, "damage/-93.55964_30.895018.jpeg"),  # File path for a damaged image
    os.path.join(h_pth, "no_damage/-95.061275_29.831535.jpeg")  # File path for an undamaged image
]

In [None]:
# Create a DenseNet201 model of input shape for processing images.
# include_top=True means that the model includes its fully connected top layer.
h_modl = DenseNet201(input_shape=(224, 224, 3), include_top=True)

# Define a list of metrics to be used for model evaluation.
h_metri = [
    tnf.keras.metrics.CategoricalAccuracy(name='acc'),  # Categorical accuracy metric
    tnf.keras.metrics.AUC(name='auc'),  # Area Under the ROC Curve metric
    tnfa.metrics.F1Score(num_classes=2)  # F1 Score metric for a binary classification problem
]

# Compiling model using optimizer Adam,
# loss function of categorical cross-entropy, specified metrics.
h_modl.compile(optimizer='adam',
              loss=tnf.losses.CategoricalCrossentropy(),
              metrics=h_metri)

# Display a summary of the model architecture, including layers and parameter counts.
h_modl.summary()

In [None]:
# Get the output tensor of the "avg_pool" layer from the pre-trained model (h_modl)
vector = h_modl.get_layer("avg_pool").output

# Create a feature extractor model by specifying the input tensor (h_modl.input)
# and the output tensor (vector)
feature_extractor = tnf.keras.Model(h_modl.input, vector)

In [None]:
# Create empty lists to store feature vectors (P_list) and corresponding labels (Q_list)
P_list = []
Q_list = []

# Loop through two classes (0: "damage" and 1: "no_damage")
for f in range(2):    
    # Construct the path to the class-specific directory
    h_folder_path = os.path.join(h_pth, h_class[f])
    
    # Iterate through files in the class-specific directory
    for h_phile in os.listdir(h_folder_path):    
        # Build the full file path
        h_fpath = os.path.join(h_folder_path, h_phile)
        
        # Check if the file has a ".jpeg" extension; skip it if not
        if not h_phile.endswith(".jpeg"):
            continue
        
        # Load the image with a target size of (224, 224)
        h_imj = image.load_img(h_fpath, target_size=(224, 224))
        
        # Converting  image
        # Image converted to array
        h_imj_arr = image.img_to_array(h_imj)
        
        # Adding one more dimensions to the image array
        imj_arr_b = nu.expand_dims(h_imj_arr, axis=0)
        
        # Preprocess the image
        h_input_imj = preprocess_input(imj_arr_b)
        
        # Extract features using a pre-trained feature extractor (assuming 'feature_extractor' is defined elsewhere)
        feature_vec = feature_extractor.predict(h_input_imj)
    
        # Append the flattened feature vector to P_list
        P_list.append(feature_vec.ravel())
        
        # Append the class label (0 or 1) to Q_list
        Q_list.append(f)

# Convert the Python lists to NumPy arrays with data type float32
P_list = nu.asarray(P_list, dtype=nu.float32)
Q_list = nu.asarray(Q_list, dtype=nu.float32)

In [None]:
# Convert the Python lists P_list and Q_list to NumPy arrays with data type float32
P = nu.asarray(P_list, dtype=nu.float32)
Q = nu.asarray(Q_list, dtype=nu.float32)

# Shuffle the data (P and Q) 100 times to randomize the order of samples
for s in range(100):
    P, Q = shuffle(P, Q)

# Print the shapes of the feature matrix P and label matrix Q
print("The shapes of feature matrix P") # printing matrix p
print(P.shape)
print("\nThe shapes of label matrix Q") # printing matrix q
print(Q.shape)

# Find unique class labels and their corresponding counts in the label matrix Q
class_types, conts = nu.unique(Q, return_counts=True)

# Print the unique class labels and their counts
print("\nClass labels") # printing labels
print(class_types)      # pronting class types
print("\nClass counts") # printing counts
print(conts)

In [None]:
# Split the feature matrix P and label matrix Q 
# In sets of testing and training 
# 'test_size' parameter specifing data proportion used for testing (20%, under this case)
# 'stratify=Q' ensures that the class distribution in the training and testing sets is similar to that in Q
# 'random_state=0' sets a seed for the random number generator to ensure reproducibility
train_P, test_P, train_Q, test_Q = train_test_split(P, 
                                                  Q, 
                                                  test_size=0.2,
                                                  stratify=Q,
                                                  random_state=0)

# Printing shapes of sets belongs to testing and training
print("train_P  is") # shape of train_P
print(train_P.shape)
print("\ntest_P  is")# shape of test_P
print(test_P.shape)

In [None]:
# Import the necessary libraries for XGBoost
import xgboost as xgb
from xgboost import XGBClassifier

# Create an XGBoost classifier with specified hyperparameters (C=1.75 and kernel="linear")
h_xgb_lin = xgb.XGBClassifier(C=1.75, kernel="linear")

# Fitting XGBoost classifier to training data
h_xgb_lin.fit(train_P, train_Q)

# Making predictions on test data using trained model
q_pred = h_xgb_lin.predict(test_P)

# Printing report of classification, 
# Provideing metrics belongs to evaluation
# like recall,precision,and F1-score
# 'target_names' parameter is used to label the classes in report
print(classification_report(test_Q, q_pred, target_names=h_class))

# Print a confusion matrix to visualize the model's performance
print(confusion_matrix(test_Q, q_pred))

In [None]:
def the_matrix(value,
               the_name,
               title='Confusion Matrix',
               cmap=None,
               normalize=True):
    """
    Plot a confusion matrix.

    Parameters:
    - value: The confusion matrix values as a numpy array.
    - the_name: Labels for the classes.
    - title: The title for the plot (default is 'Confusion Matrix').
    - cmap: Colormap for the plot (default is None, which uses 'Blues').
    - normalize: Whether to normalize the confusion matrix (default is True).

    Returns:
    - None
    """

    # Calculate accuracy and misclassification rate
    acrcy = nu.trace(value) / float(nu.sum(value))
    mis = 1 - acrcy

    # Set the colormap to 'Blues' if not specified
    if cmap is None:
        cmap = mplt.get_cmap('Blues')

    # Create a figure for the plot with specified size
    mplt.figure(figsize=(8, 6))

    # Plotting confusion matrix as image
    mplt.imshow(value, interpolation='nearest', cmap=cmap)

    # Setting title of plot
    mplt.title(title)

    # Add color bar to  plot
    mplt.colorbar()

    # Customize x and y axis labels if class labels are provided
    if the_name is not None:
        the_marking = nu.arange(len(the_name))
        mplt.xticks(the_marking, the_name, rotation=90)
        mplt.yticks(the_marking, the_name)

    # Normalize the confusion matrix if specified
    if normalize:
        value = value.astype('float') / value.sum(axis=1)[:, nu.newaxis]

    # Set the threshold for text color based on normalization
    h_boundary = value.max() / 1.5 if normalize else value.max() / 2

    # Add text annotations to the plot
    for s, t in itertools.product(range(value.shape[0]), range(value.shape[1])):
        if normalize:
            mplt.text(t, s, "{:0.4f}".format(value[s, t]),
                      horizontalalignment="center",
                      color="white" if value[s, t] > h_boundary else "black")
        else:
            mplt.text(t, s, "{:,}".format(value[s, t]),
                     horizontalalignment="center",
                     color="white" if value[s, t] > h_boundary else "black")

    # Adjust the layout of the plot for better appearance
    mplt.tight_layout()

    # Set labels for y-axis and x-axis
    mplt.ylabel('ACTUAL', fontsize=20)
    mplt.xlabel('PREDICTED', fontsize=14)

    # Show the plot
    mplt.show()

In [None]:
# Define a dictionary 'font' to set the font properties
font = {'family': 'serif',  # Font family (serif)
        'weight': 'bold',   # Font weight (bold)
        'size': 20}         # Font size (20 points)

# Use 'mplt.rc' to set the global font properties using the 'font' dictionary
mplt.rc('font', **font)

# Set the font family to "serif" for all plots
mplt.rcParams["font.family"] = "serif"

# Set the default font size for all plots to 14 points
mplt.rcParams["font.size"] = 14

In [None]:
# Call the 'the_matrix' function to plot a confusion matrix with the provided arguments
the_matrix(value=nu.array([[966, 34], [26, 974]]),  # Confusion matrix values
           normalize=False,                         # Not normalizing the values
           the_name=["Damage", "NonDamage"],        # Class labels
           title="XGBoost")                         # Title for the plot


In [None]:
# Import the OneHotEncoder class from the appropriate library (not shown in the provided code)
n_encoder = OneHotEncoder(sparse=False)

# Fit the encoder to the training labels (train_Y), reshaping them to a single column
n_encoder.fit(train_Q.reshape(-1, 1))

# Transform the training labels (train_Q) into one-hot encoded format
e_train_Q = n_encoder.transform(train_Q.reshape(-1, 1))

# Transform the testing labels (test_Q) into one-hot encoded format
e_test_Q = n_encoder.transform(test_Q.reshape(-1, 1))

In [None]:
# Define a function for creating model neural network 
def create_model():
    h_modl = Sequential()
    # Adding dense layer with 1000 units, input dimension of 1920, and ReLU activation
    h_modl.add(Dense(1000, input_dim=1920, activation="relu"))
    # Adding layer dropout regularization with a rate of 0.3
    h_modl.add(Dropout(0.3))
    # Adding another dense layer with 2 units, L2 regularization of 0.1, and linear activation
    h_modl.add(Dense(2, kernel_regularizer=l2(0.1), activation="linear"))
    # Compiling model with optimizer Adam , specified metrics, hinge loss categorical
    h_modl.compile(optimizer=tnf.keras.optimizers.Adam(lr=0.0001),
                   loss="categorical_hinge", metrics=h_metri)
    return h_modl

# Defining number of epochs belongs to training
epoch = 1000

# Creating model using function 'create_model' 
h_modl = create_model()

# Training model on data belongs to training with validation split, specified epochs, batch size, and verbosity
history = h_modl.fit(train_P, e_train_Q,
                     validation_split=0.15,
                     epochs=epoch, batch_size=64, verbose=1)

In [None]:
# Create a figure with 3 subplots in a 1x3 grid with a specified figsize
fig, aex = mplt.subplots(1, 3, figsize=(20, 10))
aex = aex.ravel()

# Iterate over the metrics ["acc", "auc", "loss"]
for s, the_metri in enumerate(["acc", "auc", "loss"]):
    # Plot the training and validation metrics for each metric
    aex[s].plot(history.history[the_metri])
    aex[s].plot(history.history["val_" + the_metri])
    
    # Setting title, y-axis and x-axis label,
    # label for each subplot
    aex[s].set_title("Model {}".format(the_metri))
    aex[s].set_xlabel("Epochs")
    aex[s].set_ylabel(the_metri)
    
    # Adding legends to differentiate between training and validation data
    aex[s].legend(["train", "val"])
    
# Define font properties for the plots
font = {'family': 'serif',  # Font family (serif)
        'weight': 'bold',    # Font weight (bold)
        'size': 12}          # Font size (12 points)

# Setting the specified font properties for the entire plot
mplt.rc('font', **font)

# Setting the font family to "serif" for all plots
mplt.rcParams["font.family"] = "serif"

In [None]:
# Create an array 'e' of evenly spaced values from 1 to 'epoch' with 'epoch' data points
e = nu.linspace(1, epoch, epoch)

# Create a figure with one subplot using Seaborn (imported as 'sb')
fig, aexes = mplt.subplots(nrows=1, ncols=1, figsize=(8, 6))

# Plotting training loss over epochs
sb.lineplot(x=e, y=history.history["loss"], aex=aexes, label="train")

# Plotting validation loss over epochs
sb.lineplot(x=e, y=history.history["val_loss"], aex=aexes, label="val")

# Setting y-axis label for the plot
aexes.set_ylabel("Categorical Hinge Loss")

# Setting x-axis label for the plot
aexes.set_xlabel("Epoch")


In [None]:
# Predicting labels class for test_P 
# Using trained model and getting class with highest probability
q_pred = nu.argmax(h_modl.predict(test_P), axis=-1)

# Print a classification report to evaluate the model's performance
# The 'target_names' parameter is used to label the classes in the report
print(classification_report(test_Q, q_pred, targeted_names=h_class))

# Print a confusion matrix to visualize the model's performance
print(confusion_matrix(test_Q, q_pred))

In [None]:
# Create a figure with 2 subplots in a 2x1 grid
fig, aex = mplt.subplots(2, 1)

# Plotting loss belongs to training in the first subplot ('aex[0]') with a blue line and label
aex[0].plot(history.history['loss'], color='b', label="Training loss")

# Plotting loss belongs to testing in the first subplot ('aex[0]') with a red line and label
aex[0].plot(history.history['val_loss'], color='r', label="Testing loss")

# Adding legend to initial subplot for differentiate between loss belongs to testing and training
legend = aex[0].legend(loc='best', shadow=True)

# Display the plot
mplt.show()

In [None]:
def the_matrix(value,
               the_name,
               title='Confusion matrix',
               cmap=None,
               normalize=True):           # defining own confusion matrix function

    # Calculate accuracy and misclassification rate
    acrcy = nu.trace(value) / float(nu.sum(value))
    mis = 1 - acrcy

    # Setting colormap to 'Blues' if not specified
    if cmap is None:
        cmap = mplt.get_cmap('Blues')

    # Create a figure for the plot with specified size
    mplt.figure(figsize=(8, 6))

    # Plot the confusion matrix as an image
    mplt.imshow(value, interpolation='nearest', cmap=cmap)

    # Setting plot's title
    mplt.title(title)

    # Adding plot's color bar
    mplt.colorbar()

    # Customize x and y axis labels if class labels are provided
    if the_name is not None:
        the_marking = nu.arange(len(the_name))
        mplt.xticks(the_marking, the_name, rotation=90)
        mplt.yticks(the_marking, the_name)

    # Normalize the confusion matrix if specified
    if normalize:
        value = value.astype('float') / value.sum(axis=1)[:, nu.newaxis]

    # Set the threshold for text color based on normalization
    h_boundary = value.max() / 1.5 if normalize else value.max() / 2

    # Add text annotations to the plot
    for s, t in itertools.product(range(value.shape[0]), range(value.shape[1])):
        if normalize:
            mplt.text(t, s, "{:0.4f}".format(value[s, t]),
                     horizontalalignment="center",
                     color="white" if value[s, t] > h_boundary else "black")
        else:
            mplt.text(t, s, "{:,}".format(value[s, t]),
                     horizontalalignment="center",
                     color="white" if value[s, t] > h_boundary else "black")

    # Adjust the layout of the plot for better appearance
    mplt.tight_layout()

    # Set labels for y-axis and x-axis
    mplt.ylabel('ACTUAL', fontsize=20)
    # setting font as 20
    mfont = {'family': 'serif', 'weight': 'bold'}
    # setting font to be bold
    mplt.xlabel('PREDICTED', fontsize=14)
    # predicted font size to be 14
    font = {'family': 'serif', 'weight': 'bold'}
    #setting font to be bold
    # Display the plot
    mplt.show()

In [None]:
# Define a dictionary 'font' to set the font properties
font = {'family': 'serif',  # Font family (serif)
        'weight': 'bold',    # Font weight (bold)
        'size': 20}          # Font size (20 points)

# Use 'mplt.rc' to set the global font properties using the 'font' dictionary
mplt.rc('font', **font)

# Set the font family to "serif" for all plots
mplt.rcParams["font.family"] = "serif"

# Set the default font size for all plots to 14 points
mplt.rcParams["font.size"] = 14

In [None]:
# Call the 'the_matrix' function to plot a confusion matrix with the provided arguments
the_matrix(value=nu.array([[976, 24], [28, 972]]),  # Confusion matrix values
           normalize=False,                         # Not normalizing the values
           the_name=["Damage", "NonDamage"],       # Class labels
           title="DenseNet201_XGBoost")            # Title for the plot  