In [None]:
#clone and install dependencies
!pip install xformers
!pip install bitsandbytes
!pip uninstall torch torchvision -y

!git clone https://github.com/huggingface/diffusers 
!cd diffusers && pip install . && cd examples/dreambooth && pip install -r requirements.txt

!pip install --upgrade peft
!pip install wandb # for outputting images mid training

In [None]:
#create environment
!accelerate config default

import os
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from transformers import CLIPProcessor, CLIPModel
import torch
import torchvision.transforms as T
import torch.nn.functional as F
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import timm
import numpy as np

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [None]:
# store the weights on huggingface
from huggingface_hub import login

# Use your Hugging Face token for login
login(token="[YOUR_HF_TOKEN]")

# Set the W&B configuration via environment variables
os.environ["WANDB_MODE"] = "online"  
os.environ["WANDB_API_KEY"] = "[YOUR_W&B_API_KEY]"  # Replace with your W&B API key 

In [None]:
import torch
import torch.nn.functional as F
from PIL import Image
from transformers import CLIPModel, CLIPProcessor


In [None]:
import torch
import torch.nn.functional as F

In [None]:
import gc
import torch

# 1) Run Python garbage collection
gc.collect()

# 2) Release all cached GPU memory back to the OS
torch.cuda.empty_cache()

# 3) (Optional) If you want to reset peak-memory stats
torch.cuda.reset_peak_memory_stats()


In [None]:
base_path = "/kaggle/working/"

folders = [
    "tmp",
    "tmp/modified_pretrained_model",
    "tmp/generated_images"
]

for folder in folders:
    folder_path = os.path.join(base_path, folder)
    os.makedirs(folder_path, exist_ok=True)
    print(f"Cartella creata: {folder_path}")



In [None]:
import os
import json
import pandas as pd
from datetime import datetime
import itertools
import subprocess
import torch
import torch.nn.functional as F
from PIL import Image
import matplotlib.pyplot as plt
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from transformers import CLIPProcessor, CLIPModel
from huggingface_hub import login
import wandb

import torch, gc
torch.cuda.empty_cache()
gc.collect()


# Python script for Kaggle notebook to extract X images from a source directory

import os
import shutil
import random


import os
import shutil
import random
device = "cuda" if torch.cuda.is_available() else "cpu"

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_embeddings = clip_model.text_model.embeddings.token_embedding
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
clip_tokenizer = clip_processor.tokenizer


def get_embedding(text):
    inputs = clip_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    inputs = {key: value.to(device) for key, value in inputs.items()}

    with torch.no_grad():
        txt_emb = clip_model.get_text_features(**inputs)
    return txt_emb


def extract_images(src_dir: str, num_images: int, dataset_name: str) -> str:
    """
    Extracts num_images random images from src_dir, copies them to a new folder
    '{num_images}_{dataset_name}' in the working directory and returns the absolute path
    of the created folder. If the folder already exists, it is deleted and recreated.
    """

    output_folder = f"{num_images}_{dataset_name}"

    if os.path.exists(output_folder):
        shutil.rmtree(output_folder)

    os.makedirs(output_folder, exist_ok=True)

    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tif', '.tiff'}

    all_images = [
        filename for filename in os.listdir(src_dir)
        if os.path.splitext(filename)[1].lower() in image_extensions
    ]

    if len(all_images) < num_images:
        raise ValueError(
            f"Richieste {num_images} immagini ma ne ho trovate solo {len(all_images)} in {src_dir}."
        )

    # Randomly select num_images images
    selected_images = random.sample(all_images, num_images)
    print(f"Selezionate {len(selected_images)} immagini in '{output_folder}'")

    for image in selected_images:
        shutil.copy(
            os.path.join(src_dir, image),
            os.path.join(output_folder, image)
        )

    return os.path.abspath(output_folder)



# 3) (Optional) If you want to reset peak-memory stats
torch.cuda.reset_peak_memory_stats()

# Configuration
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["WANDB_MODE"] = "online"
os.environ["WANDB_API_KEY"] = "e058eb869ff54a4dbade3f7a9ea6820490a46c36"

# Login to Hugging Face
login(token="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")

class DreamBoothStudy:
    def __init__(self):
        # Study parameters
        self.nr_images_list = [5,30,70]
        self.nr_class_images_list = [5, 30, 70]
        self.lr = [2e-6,5e-6]
        self.max_train_steps = 200
        self.new_token = "[V]"
        
        
        # Dataset configurations
        self.datasets = {
            "bill_cosby": {
                "images_dir": "/kaggle/input/facescrub-full/actor_faces/Bill_Cosby",
                "instance_prompt": "a photo of [V] person",
                "validation_prompt": "a photo of [V] person in a garden",
                "label": "bill cosby",  # or specific name
                "class_prompt": "a photo of a person"
            },
            "golden_retriever_dog": {
                "images_dir": "/kaggle/input/dog-breed-image-dataset/dataset/Golden_Retriever", 
                "instance_prompt": "a photo of [V] dog",
                "validation_prompt": "a photo of [V] dog in a garden",
                "label": "golden retriever",
                "class_prompt": "a photo of a dog"
            }
        }
        
        # Results storage
        self.results = []
        
    def setup_base_model(self):
        """Setup and prepare the base Stable Diffusion model"""
        print("Setting up base model...")
        
        # Load base model
        pretrained_model = StableDiffusionPipeline.from_pretrained(
            "runwayml/stable-diffusion-v1-5",
            torch_dtype=torch.float16,
        ).to("cuda")

            # Memory optimizations
        pretrained_model.enable_attention_slicing()
        pretrained_model.enable_xformers_memory_efficient_attention()
        pretrained_model.enable_model_cpu_offload()
        pretrained_model.unet.enable_gradient_checkpointing()
        
        # Add token
        pretrained_model.tokenizer.add_tokens(self.new_token)
        pretrained_model.text_encoder.resize_token_embeddings(len(pretrained_model.tokenizer))
        pretrained_model.save_pretrained("/kaggle/working/tmp/modified_pretrained_model")
        
        print("Base model setup complete!")
        return pretrained_model
    
    def train_model(self, dataset_name, nr_images, nr_class_images, lr, experiment_id):
        """Train DreamBooth LoRA model with specific parameters"""
        
        dataset_config = self.datasets[dataset_name]
        output_dir = f"/kaggle/working/tmp/dreambooth-model-{experiment_id}"
        #create folder
        # Esempio di utilizzo in un notebook Kaggle
        folder_path = extract_images(dataset_config['images_dir'], nr_images, dataset_name)
        print("folder_path:", folder_path)



        # Prepare training command
     
        cmd = [
            "accelerate", "launch",
            "--num_processes", "1",
            "/kaggle/working/diffusers/examples/dreambooth/train_dreambooth_lora.py",
            f"--pretrained_model_name_or_path=/kaggle/working/tmp/modified_pretrained_model",
            f"--instance_data_dir={folder_path}",
            f"--output_dir={output_dir}",
            f"--instance_prompt={dataset_config['instance_prompt']}",
            "--mixed_precision=bf16",
            "--train_batch_size=1",
            "--gradient_checkpointing",
            "--resolution=128",
            "--gradient_accumulation_steps=1",
            "--checkpointing_steps=100",
            f"--learning_rate={lr}",
            "--report_to=wandb",
            "--lr_scheduler=constant",
            "--lr_warmup_steps=0",
            f"--max_train_steps={self.max_train_steps}",
            f"--validation_prompt={dataset_config['validation_prompt']}",
            "--validation_epochs=50",
            "--seed=0",
            f"--num_class_images={nr_class_images}",
            f"--class_prompt={dataset_config['class_prompt']}",                  
        ]

        
        print(f"Training model with parameters:")
        print(f"  Dataset: {dataset_name}")
        print(f"  Images: {nr_images}")
        print(f"  Class Images: {nr_class_images}")
        print(f"  Learning rate: {lr}")
        
        # Run training
        try:
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)  # 1 hour timeout
            if result.returncode != 0:
                print(f"Training failed: {result.stderr}")
                return None
            print("Training completed successfully!")
            return output_dir
        except subprocess.TimeoutExpired:
            print("Training timed out!")
            return None
        except Exception as e:
            print(f"Training error: {e}")
            return None
    
    def load_trained_model(self, output_dir):
        """Load the trained model for inference"""
        try:
            # Load base pipeline
            pipe = StableDiffusionPipeline.from_pretrained(
                "/kaggle/working/tmp/modified_pretrained_model",
                torch_dtype=torch.float16
            ).to("cuda")
            
            # Load LoRA weights
            pipe.unet.load_attn_procs(output_dir)

            # assume `pipe` is your StableDiffusionPipeline
            pipe.enable_attention_slicing()                   # slices attention for lower peak usage
            pipe.enable_model_cpu_offload()                   # offloads parts to CPU when idle

            
            return pipe
        except Exception as e:
            print(f"Error loading trained model: {e}")
            return None
    
    def compute_cosine_similarity_prompt_image(self, pipe, prompt, image):
        """Compute cosine similarity between prompt and generated image using CLIP"""
        device = "cuda" if torch.cuda.is_available() else "cpu"
        
        # Ensure image is in RGB format
        if not isinstance(image, Image.Image):
            image = Image.fromarray(image).convert("RGB")
        elif image.mode != "RGB":
            image = image.convert("RGB")
            
        txt_emb = get_embedding(prompt)
     
        # Process inputs
        inputs_img = clip_processor(images=image, return_tensors="pt").to(device)
        
        with torch.no_grad():
            # Get embeddings
            image_emb = clip_model.get_image_features(**inputs_img)
  
        # Normalize and compute similarity
        image_emb = F.normalize(image_emb, p=2, dim=-1)
        txt_emb = F.normalize(txt_emb, p=2, dim=-1)
        
        cos_sim = torch.matmul(image_emb, txt_emb.T).item()
        return cos_sim
    
    def compute_cosine_similarity_label_token(self, pipe, label, token):
        """Compute cosine similarity between label and token using text encoder"""
        device = "cuda" if torch.cuda.is_available() else "cpu"

        label_emb = get_embedding(label)
        token_emb = get_embedding(token)
        
        
        # Normalize and compute similarity
        label_emb = F.normalize(label_emb, p=2, dim=-1)
        token_emb = F.normalize(token_emb, p=2, dim=-1)
        
        cos_sim = torch.matmul(label_emb, token_emb.T).item()
        return cos_sim
    
    def run_inference_and_evaluate(self, pipe, dataset_name, nr_images, nr_class_images, lr):
        """Run inference and compute similarity metrics"""
        dataset_config = self.datasets[dataset_name]
        
        # Generate image
        prompt = dataset_config['validation_prompt']
        try:
            image = pipe(prompt, num_inference_steps=50, guidance_scale=7.0).images[0]
            
            # Compute similarities
            prompt_image_sim = self.compute_cosine_similarity_prompt_image(pipe, prompt, image)
            label_token_sim = self.compute_cosine_similarity_label_token(pipe, dataset_config['label'], self.new_token)
            
            # Save result
            result = {
                'dataset': dataset_name,
                'nr_images': nr_images,
                'nr_class_images': nr_class_images,
                'lr': lr,
                'prompt_image_similarity': prompt_image_sim,
                'label_token_similarity': label_token_sim,
                'timestamp': datetime.now().isoformat()
            }
            
            self.results.append(result)
            
            # Save image for visual inspection
            image_path = f"/kaggle/working/tmp/generated_images/{dataset_name}_{nr_images}_{nr_class_images}_{lr}.png"
            os.makedirs(os.path.dirname(image_path), exist_ok=True)
            image.save(image_path)
            print(f"Prompt: {prompt},  Label: {dataset_config['label']},  Token: {self.new_token}")

            print(f"Results - Prompt-Image Sim: {prompt_image_sim:.4f}, Label-Token Sim: {label_token_sim:.4f}")
            return result
            
        except Exception as e:
            print(f"Error in inference/evaluation: {e}")
            return None
    
    def save_results(self):
        """Save results to JSON and CSV files"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Save to JSON
        json_path = f"/kaggle/working/tmp/dreambooth_study_results_{timestamp}.json"
        with open(json_path, 'w') as f:
            json.dump(self.results, f, indent=2)
        
        # Save to CSV
        df = pd.DataFrame(self.results)
        csv_path = f"/kaggle/working/tmp/dreambooth_study_results_{timestamp}.csv"
        df.to_csv(csv_path, index=False)
        
        print(f"Results saved to: {json_path} and {csv_path}")
        return df
    
    def run_complete_study(self):
        """Run the complete study with all parameter combinations"""
        print("Starting DreamBooth LoRA Study...")
        
        # Setup base model
        base_model = self.setup_base_model()
        
        # Get all parameter combinations
        combinations = list(itertools.product(
            self.datasets.keys(),
            self.nr_images_list,
            self.nr_class_images_list,
            self.lr
        ))
        
        total_experiments = len(combinations)
        print(f"Total experiments to run: {total_experiments}")
        
        for i, (dataset_name, nr_images, nr_class_images, lr) in enumerate(combinations):
            print(f"\n--- Experiment {i+1}/{total_experiments} ---")
            
            experiment_id = f"{dataset_name}_{nr_images}_{nr_class_images}_{lr}_{i}"
   
            # Train model
            output_dir = self.train_model(dataset_name, nr_images, nr_class_images, lr, experiment_id)
            
            if output_dir is None:
                print("Skipping evaluation due to training failure")
                continue
            
            # Load trained model
            pipe = self.load_trained_model(output_dir)
            if pipe is None:
                print("Skipping evaluation due to model loading failure")
                continue
            
            # Run evaluation
            result = self.run_inference_and_evaluate(pipe, dataset_name, nr_images, nr_class_images, lr)
            
            # Clean up GPU memory
            del pipe
            torch.cuda.empty_cache()
            
            # Save intermediate results
            if (i + 1) % 5 == 0:  # Save every 5 experiments
                self.save_results()
        
        # Save final results
        df = self.save_results()
        print("\nStudy completed!")
        return df
    
    def analyze_results(self, df=None):
        """Analyze and visualize the results"""
        if df is None:
            df = pd.DataFrame(self.results)
        
        print("\n--- Results Analysis ---")
        
        # Summary statistics
        print("\nSummary Statistics:")
        print(df.groupby('dataset')[['prompt_image_similarity', 'label_token_similarity']].describe())
        
        # Best configurations
        print("\nBest configurations by prompt-image similarity:")
        best_prompt_image = df.loc[df.groupby('dataset')['prompt_image_similarity'].idxmax()]
        print(best_prompt_image[['dataset', 'nr_images', 'nr_class_images', 'lr', 'prompt_image_similarity']])
        
        print("\nBest configurations by label-token similarity:")
        best_label_token = df.loc[df.groupby('dataset')['label_token_similarity'].idxmax()]
        print(best_label_token[['dataset', 'nr_images', 'nr_class_images', 'lr', 'label_token_similarity']])
        
        # Create visualizations
        self.create_visualizations(df)
        
        return df
    
    def create_visualizations(self, df):
        """Create visualization plots"""
        import seaborn as sns
        
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        
        # Plot 1: Prompt-Image Similarity by number of images
        sns.boxplot(data=df, x='nr_images', y='prompt_image_similarity', hue='dataset', ax=axes[0,0])
        axes[0,0].set_title('Prompt-Image Similarity by Number of Images')
        
        # Plot 2: Label-Token Similarity by number of images  
        sns.boxplot(data=df, x='nr_images', y='label_token_similarity', hue='dataset', ax=axes[0,1])
        axes[0,1].set_title('Label-Token Similarity by Number of Images')
        
        # Plot 3: Similarities by lr
        sns.boxplot(data=df, x='lr', y='prompt_image_similarity', hue='dataset', ax=axes[1,0])
        axes[1,0].set_title('Prompt-Image Similarity by Learning Rate')
        
        # Plot 4: Similarities by class images
        sns.boxplot(data=df, x='nr_class_images', y='label_token_similarity', hue='dataset', ax=axes[1,1])
        axes[1,1].set_title('Label-Token Similarity by Number of Class Images')
        
        plt.tight_layout()
        plt.savefig('/tmp/dreambooth_study_analysis.png', dpi=300, bbox_inches='tight')
        plt.show()

# Usage example
if __name__ == "__main__":
    # Initialize study
    study = DreamBoothStudy()
    
    # Update dataset paths (you need to set these)

    study.datasets["bill_cosby"]["images_dir"] = "/kaggle/input/facescrub-full/actor_faces/Bill_Cosby"
    study.datasets["golden_retriever_dog"]["images_dir"] = "/kaggle/input/dog-breed-image-dataset/dataset/Golden_Retriever"
    
    results_df = study.run_complete_study()
    study.analyze_results(results_df)