<a href="https://www.kaggle.com/code/gabrielfcarvalho/report-generator?scriptVersionId=254436924" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Combined Automotive Damage and Tow Truck Report Generation Notebook

# 📌 **Notebook Overview**
  This notebook demonstrates how to:
# 1️⃣ **Install and import required libraries.**
# 2️⃣ **Load a fine‐tuned BLIP model for image captioning.**
# 3️⃣ **Load a fine‐tuned YOLO model for car damage detection.**
# 4️⃣ **Load a PaddleOCR model for license plate detection and retrieve vehicle data.**
# 5️⃣ **Generate PDF reports with ReportLab.**
# 6️⃣ **Load a LLaMA 3 model from Hugging Face for generating detailed tow truck reports.**
# 7️⃣ **Aggregate data from multiple images and produce an aggregated report.**

# 📌 1️⃣ **Install and import required libraries.**

In [None]:
!pip install torch torchvision transformers ultralytics openai matplotlib opencv-python Pillow paddlepaddle==2.6.1 requests reportlab

In [None]:
!pip install paddleocr==2.7.3

In [None]:
import os
import torch
import cv2
import matplotlib.pyplot as plt
import requests
import re
from paddleocr import PaddleOCR
import xml.etree.ElementTree as ET
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer, pipeline
from ultralytics import YOLO
from PIL import Image
from google.colab.patches import cv2_imshow
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader

In [None]:
# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"

# 📌 2️⃣ **Load a fine‐tuned BLIP model for image captioning.**

In [None]:
###############################################
# 1) Load Fine-Tuned BLIP for Captioning
###############################################

blip_path = "/kaggle/input/tow-report-dataset/blip-fine-tuned-final"
processor = BlipProcessor.from_pretrained(blip_path)
model = BlipForConditionalGeneration.from_pretrained(blip_path).to(device)

def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to(device)
    outputs = model.generate(**inputs)
    return processor.decode(outputs[0], skip_special_tokens=True)

# 📌 3️⃣ **Load a fine‐tuned YOLO model for car damage detection.**

In [None]:
###############################################
# 2) Load Fine-Tuned YOLO Model for Damage Detection
###############################################

yolo_model_path = "/kaggle/input/tow-report-dataset/yolo11_best_weight.pt"
model_yolo = YOLO(yolo_model_path)

CLASS_NAMES = {
    0: 'dent',
    1: 'scratch',
    2: 'crack',
    3: 'glass shatter',
    4: 'lamp broken',
    5: 'tire flat'
}

def detect_damage(image_path):
    img = cv2.imread(image_path)
    results = model_yolo.predict(source=image_path, conf=0.25)
    bboxes = []
    for box in results[0].boxes:
        x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().tolist()
        conf = float(box.conf[0].cpu().numpy())
        cls_id = int(box.cls[0].cpu().numpy())
        position = "left" if x1 < img.shape[1] / 2 else "right"
        bboxes.append([x1, y1, x2, y2, cls_id, conf, position])
    return img, bboxes

def draw_bboxes(image, bboxes):
    for box in bboxes:
        x1, y1, x2, y2, cls_id, conf, position = box
        label = f"{CLASS_NAMES.get(cls_id, cls_id)} ({conf:.2f})"
        cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 4)
        cv2.putText(image, label, (int(x1), max(0, int(y1) - 5)),
                    cv2.FONT_HERSHEY_SIMPLEX, 5, (0, 0, 255), 5)
    return image

#📌 4️⃣ **Load a PaddleOCR model for license plate detection and retrieve vehicle data.**

In [None]:
###############################################
# 3) Load OCR Model for License Plate Detection
###############################################

ocr = PaddleOCR(lang='en')

def is_valid_plate(text):
    # Brazilian old format: "ABC-1234"
    old_format = re.fullmatch(r"[A-Z]{3}-?\d{4}", text)

    # Brazilian new format: "ABC1D23" where second numeric digit (from old format) is converted to a letter
    new_format = re.fullmatch(r"[A-Z]{3}\d[A-Z]\d{2}", text)

    return old_format or new_format

## 2. Load OCR Model for License Plate Detection
def extract_license_plate(image_path):
    result = ocr.ocr(image_path, cls=True)

    if result is None:  # Handle NoneType case
        return []

    plates = []
    for res in result:
        if res is None:  # Additional safety check
            continue
        for line in res:
            if line is None or len(line) < 2:  # Ensure valid OCR output structure
                continue
            text, confidence = line[1][0], line[1][1]
            text = text.replace(" ", "").upper()  # Normalize text
            if is_valid_plate(text):
                plates.append(text.replace("-", ""))  # Remove hyphen for API compatibility
    return plates

def get_vehicle_info(plate, username):
    api_url = f"https://www.regcheck.org.uk/api/reg.asmx/CheckBrazil?RegistrationNumber={plate}&username={username}"
    response = requests.get(api_url)
    if response.status_code == 200:
        try:
            root = ET.fromstring(response.text)
            vehicle_data = {data.tag.split("}")[-1]: data.text for elem in root.iter() if elem.tag.endswith("vehicleData") for data in elem}
            return vehicle_data
        except ET.ParseError:
            return None
    return None

# 📌 5️⃣ **Generate PDF reports with ReportLab.**

In [None]:
###############################################
# 4) Generate PDF Reports
###############################################

def generate_pdf_report(report_text, img_path, pdf_path):
    c = canvas.Canvas(pdf_path, pagesize=letter)
    c.setFont("Helvetica", 12)

    # Define image placement at the top
    img_x, img_y = 50, 500  # Placing the image at the top
    img_width, img_height = 333, 250  # Adjust dimensions

    c.drawImage(ImageReader(img_path), img_x, img_y, width=img_width, height=img_height)

    # Define text starting position below the image
    text_x, text_y = 50, img_y - 30  # Space below the image
    line_height = 15  # Line spacing
    max_text_y = 50  # Limit before starting a new page

    # Wrap long text lines
    wrapped_lines = []
    max_chars_per_line = 90
    for line in report_text.split("\n"):
        while len(line) > max_chars_per_line:
            wrapped_lines.append(line[:max_chars_per_line])
            line = line[max_chars_per_line:]
        wrapped_lines.append(line)

    # Write text line by line
    for line in wrapped_lines:
        if text_y < max_text_y:  # Create a new page if needed
            c.showPage()
            c.setFont("Helvetica", 12)
            c.drawImage(ImageReader(img_path), img_x, img_y, width=img_width, height=img_height)  # Image again on new page
            text_y = img_y - 30

        c.drawString(text_x, text_y, line)
        text_y -= line_height

    c.save()

# 📌 6️⃣ **Load a LLaMA 3 model from Hugging Face for generating detailed tow truck reports.**

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
#!huggingface-cli login

In [None]:
###############################################
# 5) Load LLaMA 3 Model from Hugging Face
###############################################

llama3_model_name = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(llama3_model_name)
llama3_pipeline = pipeline("text-generation", model=llama3_model_name, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto")

In [None]:
def generate_tow_report(image_caption, detected_damages, plate, vehicle_data):
    damage_text = "\n".join([f"- {CLASS_NAMES[d[4]]} detected on the {d[6]} side (Confidence: {d[5]:.2f})" for d in detected_damages])
    vehicle_info = f"Model: {vehicle_data.get('CarModel', 'N/A')}, Year: {vehicle_data.get('RegistrationYear', 'N/A')}" if vehicle_data else "No additional vehicle data found."
    prompt = f"""You are a professional automotive damage assessor. Write a detailed tow truck report based on the following information:

    Vehicle Details:
    - Description: {image_caption}
    - License Plate: {plate if plate else 'No plate detected'}
    - {vehicle_info}

    Damage Assessment:
    {damage_text}

    Format your response with the following sections:
    1. Vehicle Details
    2. Initial Assessment
    3. Damage Details
    4. Repair Recommendations
    5. Severity Level (Scale 1-5)
    6. Towing Requirements

    Avoid repeating informations. If ended the report end the response.

    Keep the tone professional and factual. Use numerical points and dashs for clarity, avoiding using asterix"""

    response = llama3_pipeline(
    prompt,
    max_new_tokens=400,
    do_sample=True,
    num_return_sequences=1
    )
    return response[0]["generated_text"], prompt

# 📌 7️⃣ **Aggregate data from multiple images and produce an aggregated report.**

In [None]:
import re

test_images_folder = "/kaggle/input/tow-report-dataset/CarDD_COCO/CarDD_COCO/test2017"
output_folder = "Reports"
os.makedirs(output_folder, exist_ok=True)

selected_image = ""  # Replace with the specific image name
test_images = [selected_image] if selected_image else sorted(os.listdir(test_images_folder))[:5]

for img_name in test_images:
    img_path = os.path.join(test_images_folder, img_name)
    caption = generate_caption(img_path)
    img, damages = detect_damage(img_path)
    img_annotated = draw_bboxes(img, damages)
    plates = extract_license_plate(img_path)
    vehicle_data = get_vehicle_info(plates[0], "plateOCRtest") if plates else None
    report_text, prompt = generate_tow_report(caption, damages, plates[0] if plates else None, vehicle_data)

    marker = "Keep the tone professional and factual."
    if marker in report_text:
      pattern = f"{re.escape(marker)}[^.]*\."
      split_text = re.split(pattern, report_text, maxsplit=1)
      final_report_text = split_text[-1].strip() if len(split_text) > 1 else report_text
    else:
      final_report_text = report_text[len(prompt):].strip()

    output_img_path = os.path.join(output_folder, f"annotated_{img_name}")
    pdf_path = os.path.join(output_folder, f"report_{img_name}_teste.pdf")
    cv2.imwrite(output_img_path, img_annotated)
    generate_pdf_report(final_report_text, output_img_path, pdf_path)

In [None]:
import os
import re
import cv2
import requests
import xml.etree.ElementTree as ET
from PIL import Image
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from paddleocr import PaddleOCR
# Import your BLIP and YOLO modules, e.g.:
from transformers import BlipProcessor, BlipForConditionalGeneration
# Assuming you have a YOLO wrapper already imported
from ultralytics import YOLO

# ----- 1) Load Fine-Tuned BLIP for Captioning -----
blip_path = "/kaggle/input/tow-report-dataset/blip-fine-tuned-final"
processor = BlipProcessor.from_pretrained(blip_path)
model = BlipForConditionalGeneration.from_pretrained(blip_path).to("cuda")  # or device variable

def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs)
    return processor.decode(outputs[0], skip_special_tokens=True)

# ----- 2) Load Fine-Tuned YOLO Model for Damage Detection -----
yolo_model_path = "/kaggle/input/tow-report-dataset/yolo11_best_weight.pt"
model_yolo = YOLO(yolo_model_path)

CLASS_NAMES = {
    0: 'dent',
    1: 'scratch',
    2: 'crack',
    3: 'glass shatter',
    4: 'lamp broken',
    5: 'tire flat'
}

def detect_damage(image_path):
    img = cv2.imread(image_path)
    results = model_yolo.predict(source=image_path, conf=0.25)
    bboxes = []
    for box in results[0].boxes:
        x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().tolist()
        conf = float(box.conf[0].cpu().numpy())
        cls_id = int(box.cls[0].cpu().numpy())
        position = "left" if x1 < img.shape[1] / 2 else "right"
        bboxes.append([x1, y1, x2, y2, cls_id, conf, position])
    return img, bboxes

def draw_bboxes(image, bboxes):
    for box in bboxes:
        x1, y1, x2, y2, cls_id, conf, position = box
        label = f"{CLASS_NAMES.get(cls_id, cls_id)} ({conf:.2f})"
        cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 4)
        cv2.putText(image, label, (int(x1), max(0, int(y1) - 5)),
                    cv2.FONT_HERSHEY_SIMPLEX, 4, (0, 0, 255), 4)
    return image

# ----- 3) Load OCR Model for License Plate Detection -----
ocr = PaddleOCR(lang='en')

def is_valid_plate(text):
    # Brazilian old format: "ABC-1234"
    old_format = re.fullmatch(r"[A-Z]{3}-?\d{4}", text)
    # Brazilian new format: "ABC1D23"
    new_format = re.fullmatch(r"[A-Z]{3}\d[A-Z]\d{2}", text)
    return old_format or new_format

def extract_license_plate(image_path):
    result = ocr.ocr(image_path, cls=True)
    if result is None:  # Handle NoneType case
        return []
    plates = []
    for res in result:
        if res is None:
            continue
        for line in res:
            if line is None or len(line) < 2:
                continue
            text, confidence = line[1][0], line[1][1]
            text = text.replace(" ", "").upper()  # Normalize text
            if is_valid_plate(text):
                plates.append(text.replace("-", ""))  # Remove hyphen if present
    return plates

def get_vehicle_info(plate, username):
    api_url = f"https://www.regcheck.org.uk/api/reg.asmx/CheckBrazil?RegistrationNumber={plate}&username={username}"
    response = requests.get(api_url)
    if response.status_code == 200:
        try:
            root = ET.fromstring(response.text)
            vehicle_data = {data.tag.split("}")[-1]: data.text for elem in root.iter() if elem.tag.endswith("vehicleData") for data in elem}
            return vehicle_data
        except ET.ParseError:
            return None
    return None

# ----- 4) Generate Tow Truck Report using Llama Pipeline -----
# (Assuming llama3_pipeline is defined/imported elsewhere in your project)
def generate_tow_report(image_caption, detected_damages, plate, vehicle_data):
    damage_text = "\n".join([f"- {CLASS_NAMES[d[4]]} detected on the {d[6]} side (Confidence: {d[5]:.2f})" for d in detected_damages])
    vehicle_info = f"Model: {vehicle_data.get('CarModel', 'N/A')}, Year: {vehicle_data.get('RegistrationYear', 'N/A')}" if vehicle_data else "No additional vehicle data found."
    prompt = f"""You are a professional automotive damage assessor. Write a detailed tow truck report based on the following information:

Vehicle Details:
- Description: {image_caption}
- License Plate: {plate if plate else 'No plate detected'}
- {vehicle_info}

Damage Assessment:
{damage_text}

Format your response with the following sections:
1. Vehicle Details
2. Initial Assessment
3. Damage Details
4. Repair Recommendations
5. Severity Level (Scale 1-5)
6. Towing Requirements

Avoid repeating informations. If ended the report end the response.

Keep the tone professional and factual. Use numerical points and dashs for clarity, avoiding using asterix"""

    response = llama3_pipeline(
        prompt,
        max_new_tokens=1024,
        do_sample=True,
        num_return_sequences=1
    )
    return response[0]["generated_text"], prompt

# ----- 5) Generate PDF Reports -----
def generate_aggregated_pdf_report(report_text, annotated_image_paths, pdf_path):
    c = canvas.Canvas(pdf_path, pagesize=letter)
    c.setFont("Helvetica", 12)

    # First page: Title and Aggregated Report Text
    c.drawString(50, 750, "Report Professor Car")
    text_x, text_y = 50, 720
    line_height = 15
    max_text_y = 50
    wrapped_lines = []
    max_chars_per_line = 90
    for line in report_text.split("\n"):
        while len(line) > max_chars_per_line:
            wrapped_lines.append(line[:max_chars_per_line])
            line = line[max_chars_per_line:]
        wrapped_lines.append(line)
    for line in wrapped_lines:
        if text_y < max_text_y:
            c.showPage()
            c.setFont("Helvetica", 12)
            text_y = 750
        c.drawString(text_x, text_y, line)
        text_y -= line_height
    c.showPage()

    # Subsequent pages: Each annotated image with its filename as a caption
    for img_path in annotated_image_paths:
        try:
            c.drawImage(ImageReader(img_path), 50, 300, width=333, height=250)
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
        c.drawString(50, 280, os.path.basename(img_path))
        c.showPage()

    c.save()

# ----- 6) Aggregating Information from Six Images -----
test_images_folder = "/kaggle/input/tow-report-dataset/Carro_Professor"
output_folder = "ReportsReal"
os.makedirs(output_folder, exist_ok=True)

# Get the six image files (assumes they are image files; filter if necessary)
all_files = sorted(os.listdir(test_images_folder))
test_images = [f for f in all_files if f.lower().endswith((".jpg", ".jpeg", ".png"))][:6]

# Lists to aggregate data
all_captions = []
all_damages = []
all_plates = []
annotated_image_paths = []

for img_name in test_images:
    img_path = os.path.join(test_images_folder, img_name)

    # Generate caption for each image and add to list
    caption = generate_caption(img_path)
    all_captions.append(caption)

    # Detect damages and draw bounding boxes
    img, damages = detect_damage(img_path)
    all_damages.extend(damages)
    img_annotated = draw_bboxes(img, damages)

    # Extract license plates if available
    plates = extract_license_plate(img_path)
    if plates:
        all_plates.extend(plates)

    # Save the annotated image and record its path
    annotated_img_file = os.path.join(output_folder, f"foto_{img_name}")
    cv2.imwrite(annotated_img_file, img_annotated)
    annotated_image_paths.append(annotated_img_file)

# Aggregate captions (here we join unique captions; adjust if needed)
aggregated_caption = "\n".join(list(set(all_captions)))
# For the license plate, pick the first detected (assuming same car)
plate = all_plates[0] if all_plates else None
vehicle_data = get_vehicle_info(plate, "plateOCRtest") if plate else None

# Generate the aggregated tow truck report using the aggregated information
report_text, prompt = generate_tow_report(aggregated_caption, all_damages, plate, vehicle_data)

# Optionally, clean up the report text (removing prompt text up to a marker if needed)
marker = "Keep the tone professional and factual."
if marker in report_text:
    pattern = f"{re.escape(marker)}[^.]*\\."
    split_text = re.split(pattern, report_text, maxsplit=1)
    final_report_text = split_text[-1].strip() if len(split_text) > 1 else report_text
else:
    final_report_text = report_text[len(prompt):].strip()

# Generate the aggregated PDF report
pdf_path = os.path.join(output_folder, "aggregated_report.pdf")
generate_aggregated_pdf_report(final_report_text, annotated_image_paths, pdf_path)

print(f"Aggregated report generated at: {pdf_path}")
