In [3]:
import os
import zipfile
import shutil
from pathlib import Path
from tqdm import tqdm

# Set up paths
project_root = Path(os.path.abspath(os.path.join(os.getcwd(), '..', '..')))
zip_file_1 = Path(r"C:\Users\mohan\Downloads\OneDrive_2024-09-16.zip")
zip_file_2 = Path(r"C:\Users\mohan\Downloads\OneDrive_2024-10-03.zip")
output_folder = project_root / 'ml_app' / 'images_data'

# Create the output folder if it doesn't exist
output_folder.mkdir(parents=True, exist_ok=True)

print(f"Output folder: {output_folder}")

Output folder: c:\Users\mohan\Desktop\blister\ml_app\images_data


In [4]:
def extract_images_from_zip(zip_path, output_folder):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        for file in tqdm(zip_ref.namelist(), desc=f"Extracting from {zip_path.name}"):
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')):
                # Extract the file to a temporary location
                zip_ref.extract(file, path=output_folder)
                
                # Move the file to the root of the output folder
                temp_path = output_folder / file
                new_path = output_folder / temp_path.name
                shutil.move(str(temp_path), str(new_path))
                
                # Remove any empty directories created during extraction
                temp_dir = temp_path.parent
                while temp_dir != output_folder:
                    try:
                        temp_dir.rmdir()
                        temp_dir = temp_dir.parent
                    except OSError:
                        # Directory not empty or already removed
                        break

In [None]:
# Extract images from the first zip file
extract_images_from_zip(zip_file_1, output_folder)

# Extract images from the second zip file
extract_images_from_zip(zip_file_2, output_folder)

# Count the number of extracted images
image_count = len(list(output_folder.glob('*')))
print(f"Total images extracted: {image_count}")

# Resize the images to yolo standard

In [None]:
import os
from pathlib import Path
from PIL import Image
from tqdm import tqdm

# Set up paths
project_root = Path(os.path.abspath(os.path.join(os.getcwd(), '..', '..')))
input_folder = project_root / 'ml_app' / 'images_data'
output_folder = project_root / 'ml_app' / 'resized_images'

# Create output folder if it doesn't exist
output_folder.mkdir(parents=True, exist_ok=True)

# Set the target size (YOLO typically uses multiples of 32)
target_size = (640, 640)  # You can adjust this size based on your needs

def resize_image(input_path, output_path, target_size):
    with Image.open(input_path) as img:
        # Resize the image
        img = img.resize(target_size, Image.LANCZOS)
        
        # Save the resized image
        img.save(output_path, quality=95)  # Adjust quality as needed

# Resize all images
for img_path in tqdm(list(input_folder.glob('*')), desc="Resizing images"):
    if img_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']:
        output_path = output_folder / img_path.name
        resize_image(img_path, output_path, target_size)

print(f"Resized images saved to: {output_folder}")
print(f"Total images resized: {len(list(output_folder.glob('*')))}")

# prepare synthetic data

In [9]:
import os
import shutil
from PIL import Image, ImageDraw, ImageFilter
import random
import numpy as np
import time

def add_texture(img, intensity=20):
    arr = np.array(img)
    noise = np.random.randint(-intensity, intensity, arr.shape, dtype=np.int16)
    arr = np.clip(arr + noise, 0, 255).astype(np.uint8)
    return Image.fromarray(arr)

def create_irregular_mask(size):
    mask = Image.new('L', (size*2, size*2), 0)
    draw = ImageDraw.Draw(mask)
    
    # Create a more irregular shape
    points = []
    for i in range(12):  # Increased number of points for more irregularity
        angle = i * (2 * np.pi / 12) + random.uniform(-0.2, 0.2)
        r = size * random.uniform(0.8, 1.2)
        points.append((
            size + r * np.cos(angle),
            size + r * np.sin(angle)
        ))
    
    draw.polygon(points, fill=255)
    mask = mask.filter(ImageFilter.GaussianBlur(radius=size/10))
    return np.array(mask) / 255.0

def create_synthetic_image(size=(480, 640)):
    bg_color = (random.randint(150, 250), random.randint(100, 250), random.randint(100, 250))
    img = Image.new('RGB', size, color=bg_color)
    img = add_texture(img, intensity=10)
    draw = ImageDraw.Draw(img)

    # Add X marking
    x_color = (max(0, bg_color[0] - 50), max(0, bg_color[1] - 50), max(0, bg_color[2] - 50))
    x_size = int(size[0] * 0.4)
    center_x = size[0] // 2
    center_y = int(size[1] * 0.4)
    line_width = random.randint(2, 4)  # Keeping the original thickness
    draw.line((center_x - x_size//2, center_y - x_size//2, center_x + x_size//2, center_y + x_size//2), fill=x_color, width=line_width)
    draw.line((center_x - x_size//2, center_y + x_size//2, center_x + x_size//2, center_y - x_size//2), fill=x_color, width=line_width)

    # Add horizontal line towards the bottom
    line_y = int(size[1] * 0.85)
    draw.line((center_x - x_size//2, line_y, center_x + x_size//2, line_y), fill=x_color, width=line_width)

    img_array = np.array(img)

    # Add blisters and create annotations
    num_blisters = random.randint(20, 60)  # Doubled the number of individual blisters
    blisters = []
    
    # Create clustered blisters
    num_clusters = random.randint(1, 3)
    for _ in range(num_clusters):
        cluster_center_x = random.randint(0, size[0])
        cluster_center_y = random.randint(0, size[1])
        num_cluster_blisters = random.randint(3, 8)
        
        for _ in range(num_cluster_blisters):
            blister_x = min(max(0, cluster_center_x + random.randint(-30, 30)), size[0]-1)
            blister_y = min(max(0, cluster_center_y + random.randint(-30, 30)), size[1]-1)
            blister_size = random.randint(2, 15)  # Increased max size
            
            create_blister(img_array, blister_x, blister_y, blister_size, bg_color, size)
            blisters.append(create_annotation(blister_x, blister_y, blister_size, size))

    # Create individual blisters
    for _ in range(num_blisters - num_clusters * num_cluster_blisters):
        blister_x = random.randint(0, size[0]-1)
        blister_y = random.randint(0, size[1]-1)
        blister_size = random.randint(2, 25)  # Increased size range
        
        create_blister(img_array, blister_x, blister_y, blister_size, bg_color, size)
        blisters.append(create_annotation(blister_x, blister_y, blister_size, size))

    img = Image.fromarray(img_array)
    return img, blisters

def create_blister(img_array, blister_x, blister_y, blister_size, bg_color, size):
    mask = create_irregular_mask(blister_size)
    
    # Determine blister color (mostly background color, some whitish)
    if random.random() < 0.2:  # 20% chance of whitish blister
        blister_color = np.array([random.randint(220, 255), random.randint(220, 255), random.randint(220, 255)])
    else:
        blister_color = np.array([
            max(0, min(255, bg_color[0] + random.randint(-20, 20))),
            max(0, min(255, bg_color[1] + random.randint(-20, 20))),
            max(0, min(255, bg_color[2] + random.randint(-20, 20)))
        ])

    # Apply blister to image
    for i in range(blister_size*2):
        for j in range(blister_size*2):
            if blister_x+i < size[0] and blister_y+j < size[1]:
                img_array[blister_y+j, blister_x+i] = (
                    img_array[blister_y+j, blister_x+i] * (1 - mask[j, i]) +
                    blister_color * mask[j, i]
                ).astype(np.uint8)

    # Add subtle highlight
    highlight_color = np.array([min(255, c + 30) for c in blister_color])
    highlight_size = max(1, blister_size // 4)
    highlight_mask = create_irregular_mask(highlight_size)
    
    highlight_x = blister_x + blister_size - highlight_size
    highlight_y = blister_y + blister_size - highlight_size

    for i in range(highlight_size*2):
        for j in range(highlight_size*2):
            if highlight_x+i < size[0] and highlight_y+j < size[1]:
                img_array[highlight_y+j, highlight_x+i] = (
                    img_array[highlight_y+j, highlight_x+i] * (1 - highlight_mask[j, i]) +
                    highlight_color * highlight_mask[j, i]
                ).astype(np.uint8)

def create_annotation(blister_x, blister_y, blister_size, size):
    x_center = (blister_x + blister_size) / size[0]
    y_center = (blister_y + blister_size) / size[1]
    width = (2 * blister_size) / size[0]
    height = (2 * blister_size) / size[1]
    return f"0 {x_center} {y_center} {width} {height}"

def clear_directory(directory):
    if os.path.exists(directory):
        shutil.rmtree(directory)
    os.makedirs(directory)

def generate_dataset(num_images, output_dir):
    print(f"Starting to generate {num_images} synthetic images...")
    start_time = time.time()

    # Clear existing data
    clear_directory(output_dir)

    # Create directory structure
    for data_type in ['images', 'labels']:
        for subset in ['train', 'test', 'val']:
            os.makedirs(os.path.join(output_dir, data_type, subset), exist_ok=True)

    for i in range(num_images):
        if i % 10 == 0:
            print(f"Generating image {i+1}/{num_images}...")
        
        try:
            img, blisters = create_synthetic_image()
            
            # Determine which subset this image belongs to
            if i < num_images * 0.7:
                subset = 'train'
            elif i < num_images * 0.9:
                subset = 'test'
            else:
                subset = 'val'
            
            # Save image
            img_path = os.path.join(output_dir, 'images', subset, f'synthetic_{i:04d}.png')
            img.save(img_path)
            
            # Save annotations
            label_path = os.path.join(output_dir, 'labels', subset, f'synthetic_{i:04d}.txt')
            with open(label_path, 'w') as f:
                f.write('\n'.join(blisters))
        except Exception as e:
            print(f"Error generating image {i+1}: {str(e)}")

    end_time = time.time()
    print(f"Generated {num_images} synthetic images and annotations in {output_dir}")
    print(f"Distribution: {num_images * 0.7:.0f} train, {num_images * 0.2:.0f} test, {num_images * 0.1:.0f} val")
    print(f"Total time taken: {end_time - start_time:.2f} seconds")



In [10]:
if __name__ == "__main__":
    print("Synthetic Data Generator Script Starting...")
    output_dir = r"C:\Users\mohan\Desktop\blister\ml_app\datasets\synthetic_dataset"
    num_images = 5000
    generate_dataset(num_images, output_dir)
    print("Script execution completed.")

Synthetic Data Generator Script Starting...
Starting to generate 5000 synthetic images...
Generating image 1/5000...
Generating image 11/5000...
Generating image 21/5000...
Generating image 31/5000...
Generating image 41/5000...
Generating image 51/5000...
Generating image 61/5000...
Generating image 71/5000...
Generating image 81/5000...
Generating image 91/5000...
Generating image 101/5000...
Generating image 111/5000...
Generating image 121/5000...
Generating image 131/5000...
Generating image 141/5000...
Generating image 151/5000...
Generating image 161/5000...
Generating image 171/5000...
Generating image 181/5000...
Generating image 191/5000...
Generating image 201/5000...
Generating image 211/5000...
Generating image 221/5000...
Generating image 231/5000...
Generating image 241/5000...
Generating image 251/5000...
Generating image 261/5000...
Generating image 271/5000...
Generating image 281/5000...
Generating image 291/5000...
Generating image 301/5000...
Generating image 311/5