### Aircraft Damage Classification and Captioning Using Pretrained Deep Learning Models


Aircraft damage inspection is a critical task for ensuring flight safety and maintaining aircraft structural integrity. Traditional manual inspection methods are time-consuming, costly, and susceptible to human error. This project presents an automated deep learning approach to classify aircraft damage and generate descriptive captions for damaged aircraft images.

The project focuses on two main tasks:

1. Damage Classification – Classifying aircraft damage into two categories:
   Dent and Crack, using feature extraction with a pre-trained VGG16 convolutional neural network.

2. Damage Captioning and Summarization – Generating natural-language descriptions and summaries of aircraft damage images using a pre-trained Transformer-based model, enabling explainable and interpretable outputs.

By combining computer vision and natural language processing techniques, this project demonstrates how deep learning can be applied to automate aircraft inspection workflows, reduce manual effort, and improve maintenance efficiency in the aviation industry.


### Import Libraries


In [None]:
import zipfile
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.applications import VGG16
from keras.optimizers import Adam
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
from keras.preprocessing import image
import random

seed_value = 42
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

### Configuration for data loading


In [None]:
batch_size = 32
n_epochs = 10
img_rows, img_cols = 224, 224
input_shape = (img_rows, img_cols, 3)

### Extract dataset and prepare


In [None]:
import tarfile
import urllib.request
import os
import shutil

url = "DATASET_URL"

# Define the path to save the file
tar_filename = "aircraft_damage_dataset_v1.tar"
extracted_folder = "aircraft_damage_dataset_v1"  # Folder where contents will be extracted

# Download the tar file
urllib.request.urlretrieve(url, tar_filename)
print(f"Downloaded {tar_filename}. Extraction will begin now.")

# Check if the folder already exists
if os.path.exists(extracted_folder):
    print(f"The folder '{extracted_folder}' already exists. Removing the existing folder.")
    
    # Remove the existing folder to avoid overwriting or duplication
    shutil.rmtree(extracted_folder)
    print(f"Removed the existing folder: {extracted_folder}")

# Extract the contents of the tar file
with tarfile.open(tar_filename, "r") as tar_ref:
    tar_ref.extractall()  # This will extract to the current directory
    print(f"Extracted {tar_filename} successfully.")


extract_path = "aircraft_damage_dataset_v1"
train_dir = os.path.join(extract_path, 'train')
test_dir = os.path.join(extract_path, 'test')
valid_dir = os.path.join(extract_path, 'valid')


train_datagen = ImageDataGenerator(rescale=1./255)
valid_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_rows, img_cols),   # Resize images to the size VGG16 expects
    batch_size=batch_size,
    seed = seed_value,
    class_mode='binary',
    shuffle=True # Binary classification: dent vs crack
)

valid_generator = valid_datagen.flow_from_directory(
    directory=valid_dir,
    class_mode='binary',
    seed=seed_value,
    batch_size=batch_size,
    shuffle=False,
    target_size=(img_rows, img_cols)
)

test_generator = test_datagen.flow_from_directory(
    directory=test_dir,
    class_mode='binary',
    seed=seed_value,
    batch_size=batch_size,
    shuffle=False,
    target_size=(img_rows, img_cols)
)


### Model defination using pretrain model


In [None]:
base_model = VGG16(
    weights="imagenet",
    include_top=False,
    input_shape=(img_rows, img_cols, 3)
)


base_model.trainable = False

model = Sequential([
    base_model,
    Flatten(),
    Dense(512, activation="relu"),
    Dropout(0.3),
    Dense(512, activation="relu"),
    Dropout(0.3),
    Dense(1, activation="sigmoid")
])


model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

### Model Training


In [None]:
history = model.fit(
    train_generator,
    epochs=n_epochs,
    validation_data=valid_generator
)

train_history = model.history.history

### Model Evaluation


In [None]:
test_loss, test_accuracy = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

### Display Image


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image

# Function to plot a single image and its prediction
def plot_image_with_title(image, model, true_label, predicted_label, class_names):
    plt.figure(figsize=(6, 6))
    plt.imshow(image)

    # Convert labels from one-hot to class indices if needed, but for binary labels it's just 0 or 1
    true_label_name = class_names[true_label]  # Labels are already in class indices
    pred_label_name = class_names[predicted_label]  # Predictions are 0 or 1

    plt.title(f"True: {true_label_name}\nPred: {pred_label_name}")
    plt.axis('off')
    plt.show()

# Function to test the model with images from the test set
def test_model_on_image(test_generator, model, index_to_plot=0):
    # Get a batch of images and labels from the test generator
    test_generator.reset()
    test_images, test_labels = next(test_generator)

    # Make predictions on the batch
    predictions = model.predict(test_images)

    # In binary classification, predictions are probabilities (float). Convert to binary (0 or 1)
    predicted_classes = (predictions > 0.5).astype(int).flatten()

    # Get the class indices from the test generator and invert them to get class names
    class_indices = test_generator.class_indices
    class_names = {v: k for k, v in class_indices.items()}  # Invert the dictionary

    # Specify the image to display based on the index
    image_to_plot = test_images[index_to_plot]
    true_label = test_labels[index_to_plot]
    predicted_label = predicted_classes[index_to_plot]

    # Plot the selected image with its true and predicted labels
    plot_image_with_title(image=image_to_plot, model=model, true_label=true_label, predicted_label=predicted_label, class_names=class_names)





### Image captioning and summarization using BLIP model


In [None]:
# NOTE:
# BLIP is a PyTorch-based Transformer model.
# TensorFlow's tf.py_function is used here to integrate
# PyTorch inference into a TensorFlow workflow for demonstration purposes.


import torch
import tensorflow as tf
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")



class BlipCaptionSummaryLayer(tf.keras.layers.Layer):
    def __init__(self, processor, blip_model, **kwargs):
        super().__init__(**kwargs)
        self.processor = processor
        self.model = blip_model


    def call(self, image_path, task):
        # Use tf.py_function to run the custom image processing and text generation
        return tf.py_function(self.process_image, [image_path, task], tf.string)

    def process_image(self, image_path, task):
        """
        Perform image loading, preprocessing, and text generation.

        Args:
            image_path: Path to the image file as a string.
            task: The type of task ("caption" or "summary").

        Returns:
            The generated caption or summary as a string.
        """
        try:
            # Decode the image path from the TensorFlow tensor to a Python string
            image_path_str = image_path.numpy().decode("utf-8")

            # Open the image using PIL and convert it to RGB format
            image = Image.open(image_path_str).convert("RGB")

            # Set the appropriate prompt based on the task
            if task.numpy().decode("utf-8") == "caption":
                prompt = "This is a picture of"  # Modify prompt for more natural output
            else:
                prompt = "This is a detailed photo showing"  # Modify for summary

            # Prepare inputs for the BLIP model
            inputs = self.processor(images=image, text=prompt, return_tensors="pt")

            # Generate text output using the BLIP model
            output = self.model.generate(**inputs)

            # Decode the output into a readable string
            result = self.processor.decode(output[0], skip_special_tokens=True)
            return result
        except Exception as e:
            # Handle errors during image processing or text generation
            print(f"Error: {e}")
            return "Error processing image"


def generate_text(image_path, task):
    blip_layer = BlipCaptionSummaryLayer(processor, blip_model)
    return blip_layer([image_path, task])


# Replace with a valid image path from your dataset

image_url = "aircraft_damage_dataset_v1/test/dent/example.jpg"

img = plt.imread(image_url)
plt.imshow(img)
plt.axis('off') 
plt.show()


# Replace with a valid image path from your dataset

image_path = tf.constant(
    "aircraft_damage_dataset_v1/test/dent/example.jpg"
)

caption = generate_text(image_path, tf.constant("caption"))
summary = generate_text(image_path, tf.constant("summary"))

print("Caption:", caption.numpy().decode("utf-8"))
print("Summary:", summary.numpy().decode("utf-8"))




### Results Summary

- Binary damage classification achieved high accuracy using VGG16 feature extraction.
- The BLIP model generated coherent captions and summaries describing aircraft damage.
- The combined CV + NLP pipeline improves interpretability and inspection efficiency.
