In [None]:
import os
import re
import shutil
import base64
import io
import csv
from openai import OpenAI
from dotenv import load_dotenv
from PIL import Image

# Load environment variables
load_dotenv()

# OpenAI API key
api_key = 'sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'  # Your API key

# ==================== Part 1: Image encoding and description ====================

def encode_image(image_path, target_size):
    """Resize the image and encode it into Base64 format"""
    with Image.open(image_path) as img:
        img_resized = img.resize(target_size)
    with io.BytesIO() as buffer:
        img_resized.save(buffer, format="JPEG")
        return base64.b64encode(buffer.getvalue()).decode('utf-8')


def encode_images_in_folder(folder_path, target_size=(256, 256)):
    """Encode all images in a folder into Base64 format"""
    encoded_images = []
    files = os.listdir(folder_path)
    image_files = [
        f for f in files
        if f.endswith(('.jpeg', '.jpg', '.png', '.JPG'))
    ]
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        encoded_images.append(encode_image(image_path, target_size))
    return encoded_images


# Initialize OpenAI API client
client = OpenAI(api_key=api_key)
# client = OpenAI(api_key=api_key, base_url='https://www.xxxxxxx')  # Use base_url if using an API proxy


def describe_image_separately(encoded_images, image_files):
    """Generate a description for each image individually"""
    descriptions = {}

    for i, image in enumerate(encoded_images):
        image_file = image_files[i]
        content = [
            {"type": "text", "text": "What is in this image?"},
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image}"}}
        ]

        response = client.chat.completions.create(
            messages=[{"role": "user", "content": content}],
            model="gpt-4o-mini",
            max_tokens=200,
        )

        # Print model response
        print(f"Response for image {image_file}:", response)

        try:
            description = response.choices[0].message.content.strip()
            descriptions[image_file] = description
        except AttributeError as e:
            print(f"Error: Failed to extract image description - {e}")

    print("Current batch image descriptions:", descriptions)
    return descriptions


def describe_images_in_batches(encoded_images, image_files, batch_size=5):
    """Process image descriptions in batches"""
    descriptions = {}
    num_images = len(encoded_images)

    # Split images into multiple batches
    for i in range(0, num_images, batch_size):
        batch_images = encoded_images[i:i + batch_size]
        batch_files = image_files[i:i + batch_size]

        # Generate descriptions for the current batch
        batch_descriptions = describe_image_separately(batch_images, batch_files)

        # Merge batch results
        descriptions.update(batch_descriptions)

    return descriptions


# ==================== Part 2: Image classification and relocation ====================

def classify_and_describe_image(description):
    """Classify the image based on its description and return the category and refined description"""
    prompt = (
        "Please classify the image defect based on the following description.\n\n"
        f"Description: {description}\n\n"
        "You must select the most appropriate category from Defect and Non-Defect.\n"
        "Strictly output in the following format:\n"
        "Class: [category name]\n"
        "Description: [detailed description]"
    )

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are an expert in analyzing building defects."},
                {"role": "user", "content": prompt},
            ],
            max_tokens=150
        )

        response_text = response.choices[0].message.content.strip()
        print(f"Model classification output: {response_text}")

        # Parse output using regular expressions
        match = re.search(r"Class:\s*(\w+)\s*Description:\s*(.+)", response_text, re.DOTALL)
        if match:
            category = match.group(1).capitalize()
            description = match.group(2).strip()
            return category, description
        else:
            # Fallback strategy: extract category only
            fallback_match = re.search(r"(Defect|Non-Defect)", response_text, re.IGNORECASE)
            if fallback_match:
                category = fallback_match.group(1).capitalize()
                description = "Fallback description based on classification only."
                return category, description
            else:
                return "Uncategorized", "Failed to parse description"

    except Exception as e:
        print(f"Error during classification: {e}")
        return "Uncategorized", "Failed to parse description"


def categorize_images_by_description(image_descriptions):
    """Analyze descriptions and classify images"""
    print("Starting image classification...")
    categorized_images = {}
    descriptions_with_categories = {}

    for image_name, description in image_descriptions.items():
        category, detailed_description = classify_and_describe_image(description)
        categorized_images[image_name] = category
        descriptions_with_categories[image_name] = {
            "category": category,
            "description": detailed_description
        }

    print("Image classification completed.")
    return categorized_images, descriptions_with_categories


def clean_category_name(category):
    """Clean and normalize category names"""
    category = category.lower()
    category = re.sub(r"[^a-z0-9\s]", "", category)  # Remove non-alphanumeric characters
    category = category.replace(" ", "_")           # Replace spaces with underscores
    return category.strip()


def move_images_by_category(folder_path, categorized_images):
    """Create 'defect' and 'non-defect' folders dynamically and move images accordingly"""
    print("Starting image relocation...")

    defect_folder = os.path.join(folder_path, 'defect')
    non_defect_folder = os.path.join(folder_path, 'non-defect')

    if not os.path.exists(defect_folder):
        os.makedirs(defect_folder)

    if not os.path.exists(non_defect_folder):
        os.makedirs(non_defect_folder)

    for image_name, category in categorized_images.items():
        target_folder = defect_folder if category.lower() == "defect" else non_defect_folder

        source_path = os.path.join(folder_path, image_name)
        target_path = os.path.join(target_folder, image_name)

        try:
            shutil.move(source_path, target_path)
            print(f"Moved {image_name} to {target_folder}")
        except Exception as e:
            print(f"Error moving image {image_name}: {e}")

    print("Image relocation completed.")


def debug_stage(stage_name, function, *args, **kwargs):
    """Execute a processing stage and capture potential errors with detailed debug information"""
    try:
        print(f"===== Debug Stage: {stage_name} =====")
        result = function(*args, **kwargs)
        if result is None:
            print(f"Warning: Stage '{stage_name}' returned None")
        print(f"===== Stage Completed: {stage_name} =====")
        return result
    except Exception as e:
        print(f"Error in stage '{stage_name}': {e}")
        return None


# ==================== Save descriptions and classifications to CSV ====================

def save_descriptions_to_csv(image_descriptions, descriptions_with_categories, output_file):
    """Save image descriptions and classification results to a CSV file"""
    header = ['Image Name', 'Image Description', 'Classification Description', 'Category']

    with open(output_file, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)

        # Write header
        writer.writerow(header)

        # Write image data
        for image_name, description in image_descriptions.items():
            category_info = descriptions_with_categories.get(image_name, {})
            category = category_info.get('category', 'Uncategorized')
            category_description = category_info.get('description', 'Failed to parse description')

            writer.writerow([image_name, description, category_description, category])

    print(f"Image descriptions and classifications saved to {output_file}")


# ==================== Main entry point ====================

if __name__ == "__main__":
    folder_path = r"D:\0 Desktop\Misc\GPT_Test\GPT-damage-copy\Balanced test\Test"  # Image folder path

    # Stage 1: Encode images
    encoded_images = debug_stage("Image Encoding", encode_images_in_folder, folder_path)

    if encoded_images:
        image_files = [
            f for f in os.listdir(folder_path)
            if f.endswith(('.jpeg', '.jpg', '.png', '.JPG'))
        ]

        # Stage 2: Generate descriptions
        image_descriptions = debug_stage(
            "Image Description",
            describe_images_in_batches,
            encoded_images,
            image_files,
            batch_size=5
        )

        if isinstance(image_descriptions, dict) and image_descriptions:
            # Stage 3: Classify images
            categorized_images, descriptions_with_categories = debug_stage(
                "Image Classification",
                categorize_images_by_description,
                image_descriptions
            )

            if categorized_images:
                # Stage 4: Save results to CSV
                output_file = r"D:\0 Desktop\Misc\GPT_Test\image-GPT.csv"
                save_descriptions_to_csv(
                    image_descriptions,
                    descriptions_with_categories,
                    output_file
                )

                # Stage 5: Move images
                debug_stage(
                    "Move Images",
                    move_images_by_category,
                    folder_path,
                    categorized_images
                )
        else:
            print("Error: Image descriptions are empty or in an invalid format.")
