# Image Generation for Binary Classification 

In [1]:
import torch

from compose_glide import ComposeGlide
from PIL import Image

In [None]:
compositional_prompts = [
    "No Smiling AND NOT Glasses AND NOT Female",
    "Smiling AND NOT (No Glasses) AND NOT Female",
    "NOT (No Smiling) AND No Glasses AND NOT Male",
    "NOT (No Smiling) AND NOT (No Glasses) AND Male",
    "Smiling AND NOT (No Glasses) AND NOT Male"
]

NUM_VARIANTS = 20

def tensor_to_image(tensor):
    """Convert a PyTorch tensor to a PIL Image."""
    # Scale from [-1, 1] to [0, 255]
    scaled = ((tensor + 1) * 127.5).round().clamp(0, 255).to(torch.uint8).cpu()
    
    # Rearrange dimensions from CxHxW to HxWxC
    if scaled.dim() == 3:  # Single image
        img = scaled.permute(1, 2, 0).numpy()
    else:  # Batch of images
        img = scaled[0].permute(1, 2, 0).numpy()  # Take the first image
        
    return Image.fromarray(img)

compose_glide = ComposeGlide(model_name='glide_faces', verbose=True)
print(compose_glide)

            ComposeGLIDE Instance Configuration             
Device:                        mps                         
Verbose:                       True                        
------------------------------------------------------------
Base Model                                                  
  Parameters:                  385,030,726                 
  FP16 Enabled:                False                       
  Timestep Respacing:          100                         
  Image Size:                  64                          
------------------------------------------------------------
Upsampler Model                                             
  Parameters:                  398,361,286                 
  FP16 Enabled:                False                       
  Timestep Respacing:          fast27                      
  Image Size:                  256                         


In [3]:
for i, prompt in enumerate(compositional_prompts):
    for j in range(NUM_VARIANTS):
        result, _ = compose_glide.generate(
            prompt, 
            num_images=1, 
            upsample=True, 
            upsample_temp=0.995,
            save_intermediate_steps=10,
            return_attention_maps=True
        )

        image = tensor_to_image(result)
        image_path = f"/Users/deniskrylov/Developer/University/compose-glide/outputs/prompt_{i}_variant_{j}.png"
        image.save(image_path)
        print(f"Saved: {image_path}!")
        
        break
    break

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


CLIP model loaded from cache: clip_model_cache
Using prompts: ['smiling', 'glasses', 'female'] with weights: [-5.765088810096396, -6.084469064436272, -6.1504421254673325]
Generating base image 1/1...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/100 [00:00<?, ?it/s]

Upsampling base image 1/1...


  0%|          | 0/27 [00:00<?, ?it/s]

Saved: /Users/deniskrylov/Developer/University/compose-glide/outputs/prompt_0_variant_0.png!
