In [1]:
from transformers import CLIPProcessor, CLIPModel
from PIL import Image




  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

In [3]:
import os
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import numpy as np

# Load the model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Define the prompts
text_prompts = [
    "The product has a leaf anywhere on it",
    "The product has no leaves anywhere on it"
]

# Paths to folders containing images
positive_folder = r"LeafExamples\1"  # With leaves
negative_folder = r"LeafExamples\0"  # Without leaves

# Function to process images and calculate probabilities
def process_folder(folder_path, label):
    image_scores = []
    print(f"Processing {label} examples...")
    for image_file in os.listdir(folder_path):
        if image_file.endswith((".jpg", ".png", ".jpeg")):  # Check for image files
            image_path = os.path.join(folder_path, image_file)
            image = Image.open(image_path)

            # Preprocess the image and text prompts
            inputs = processor(text=text_prompts, images=image, return_tensors="pt", padding=True)

            # Get predictions
            outputs = model(**inputs)
            logits_per_image = outputs.logits_per_image

            # Softmax to get probabilities
            probs = logits_per_image.softmax(dim=1)

            # Calculate the combined score
            combined_score = probs[0][0].item() - probs[0][1].item()

            # Print probabilities and combined score for this image
            print(f"Image: {image_file}")
            for i, prompt in enumerate(text_prompts):
                print(f"  '{prompt}': {probs[0][i].item():.4f}")
            print(f"  Combined score (has leaf - no leaves): {combined_score:.4f}")

            # Append scores for averaging
            image_scores.append(probs[0].detach().numpy())
    return np.array(image_scores)

# Process positive and negative folders
positive_scores = process_folder(positive_folder, "Positive (with leaves)")
negative_scores = process_folder(negative_folder, "Negative (without leaves)")

# Calculate averages for each text prompt
positive_averages = positive_scores.mean(axis=0) if positive_scores.size > 0 else np.zeros(len(text_prompts))
negative_averages = negative_scores.mean(axis=0) if negative_scores.size > 0 else np.zeros(len(text_prompts))

# Calculate average combined scores
positive_combined_avg = positive_averages[0] - positive_averages[1]
negative_combined_avg = negative_averages[0] - negative_averages[1]

# Print the average scores for each text prompt and combined scores
print("\nAverage scores for each prompt:")
for i, prompt in enumerate(text_prompts):
    print(f"'{prompt}': Positive Avg: {positive_averages[i]:.4f}, Negative Avg: {negative_averages[i]:.4f}")
print(f"\nAverage combined score (has leaf - no leaves):")
print(f"  Positive Avg: {positive_combined_avg:.4f}")
print(f"  Negative Avg: {negative_combined_avg:.4f}")


Processing Positive (with leaves) examples...
Image: 10721619.jpg
  'The product has a leaf anywhere on it': 0.6914
  'The product has no leaves anywhere on it': 0.3086
  Combined score (has leaf - no leaves): 0.3829
Image: 11012811.jpg
  'The product has a leaf anywhere on it': 0.8069
  'The product has no leaves anywhere on it': 0.1931
  Combined score (has leaf - no leaves): 0.6137
Image: 11013163.jpg
  'The product has a leaf anywhere on it': 0.7871
  'The product has no leaves anywhere on it': 0.2129
  Combined score (has leaf - no leaves): 0.5742
Image: 11239208.jpg
  'The product has a leaf anywhere on it': 0.6314
  'The product has no leaves anywhere on it': 0.3686
  Combined score (has leaf - no leaves): 0.2628
Image: 11296679.jpg
  'The product has a leaf anywhere on it': 0.7308
  'The product has no leaves anywhere on it': 0.2692
  Combined score (has leaf - no leaves): 0.4616
Image: 11677916.jpg
  'The product has a leaf anywhere on it': 0.1596
  'The product has no leaves 

In [None]:
import os
import pandas as pd
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch
from multiprocessing import Pool
from tqdm import tqdm  # For the progress bar

# Load the model and processor
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Define the prompts
text_prompts = [
    "The product has a leaf anywhere on it",
    "The product has no leaves anywhere on it"
]

# Batch size for processing and saving results
BATCH_SIZE = 100

# Function to collect all image paths from the base folder
def collect_image_paths(base_folder):
    image_paths = []
    for root, _, files in os.walk(base_folder):
        for file in files:
            if file.endswith((".jpg", ".jpeg", ".png")):
                gtin = os.path.basename(root)
                image_path = os.path.join(root, file)
                image_paths.append((gtin, image_path))
    return image_paths

# Function to process a batch of images
def process_batch(batch):
    batch_results = []
    images = []
    gtins = []
    image_names = []

    for gtin, image_path in batch:
        try:
            image = Image.open(image_path)
            images.append(image)
            gtins.append(gtin)
            image_names.append(os.path.basename(image_path))
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")
            continue

    if images:
        # Preprocess all images in the batch
        inputs = processor(text=text_prompts, images=images, return_tensors="pt", padding=True)
        inputs = {key: val.to(device) for key, val in inputs.items()}  # Move inputs to GPU if available

        # Get predictions for the batch
        with torch.no_grad():
            outputs = model(**inputs)
            logits_per_image = outputs.logits_per_image
            probs = logits_per_image.softmax(dim=1)

        # Collect results
        for i in range(len(images)):
            leaf_score = probs[i][0].item()
            no_leaf_score = probs[i][1].item()
            batch_results.append({
                "gtin": gtins[i],
                "image_name": image_names[i],
                "leaf_score": leaf_score,
                "no_leaf_score": no_leaf_score
            })

    return batch_results

# Function to process all images in the base folder
def process_all_images(base_folder, output_csv_prefix):
    # Collect all image paths
    print("Collecting image paths...")
    image_paths = collect_image_paths(base_folder)
    print(f"Collected {len(image_paths)} images.")

    # Split into batches
    batches = [image_paths[i:i + BATCH_SIZE] for i in range(0, len(image_paths), BATCH_SIZE)]
    print(f"Processing {len(batches)} batches of size {BATCH_SIZE}.")

    all_results = []

    # Add a progress bar
    with Pool() as pool, tqdm(total=len(batches), desc="Processing batches", unit="batch") as pbar:
        for idx, batch_results in enumerate(pool.imap(process_batch, batches), start=1):
            all_results.extend(batch_results)

            # Save to CSV in chunks
            if len(all_results) >= BATCH_SIZE:
                batch_df = pd.DataFrame(all_results)
                batch_file = f"{output_csv_prefix}_batch_{idx}.csv"
                batch_df.to_csv(batch_file, index=False)
                print(f"Saved batch {idx} to {batch_file}")
                all_results = []  # Clear results after saving

            pbar.update(1)  # Update the progress bar

    # Save remaining results
    if all_results:
        batch_df = pd.DataFrame(all_results)
        batch_file = f"{output_csv_prefix}_final.csv"
        batch_df.to_csv(batch_file, index=False)
        print(f"Saved final results to {batch_file}")

# Main execution
if __name__ == "__main__":
    base_folder = r"E:\Brandbank\brandbank"  # Replace with the path to your folder
    output_csv_prefix = "leaf_detection_results"  # Prefix for output CSV files

    process_all_images(base_folder, output_csv_prefix)


Collecting image paths...
Collected 815707 images.
Processing 8158 batches of size 100.


Processing batches:   0%|          | 0/8158 [00:00<?, ?batch/s]