<a href="https://colab.research.google.com/github/Yang-star-source/Latent_Diffusion_From_Scratch/blob/main/Text_Embedding_Latent_Diffusion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from huggingface_hub import hf_hub_download
import os

repo_id = "ziyang06315/cats_images_dataset"
DATASET_PATH = "/content/cats"
os.makedirs(DATASET_PATH,exist_ok=True)
cats_images_dataset = hf_hub_download(repo_id=repo_id,
                                      filename="cat.zip",
                                      repo_type="dataset")
!unzip -q {cats_images_dataset} -d {DATASET_PATH}


In [None]:
# Install a version of transformers that works with Florence-2
!pip install transformers==4.41.2 timm einops pillow

# CRITICAL: You must restart the session after running this!
# In Colab menu: Runtime > Restart Session

In [None]:
import os
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM

# Install flash_attn which is a dependency for the model
!pip install flash_attn

# --- CONFIGURATION ---
IMAGE_FOLDER = "/content/cats"
MODEL_ID = "microsoft/Florence-2-large"

# Force GPU usage if available
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device found: {DEVICE}")

if DEVICE == "cpu":
    print("WARNING: You are running on CPU. This will be very slow. Make sure 'Change Runtime Type' is set to T4 GPU.")

# --- LOAD MODEL (Main Branch) ---
print(f"Loading {MODEL_ID}...")

# We dropped the 'revision' tag. The pinned pip version handles the compatibility.
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    trust_remote_code=True
).to(DEVICE).eval()

processor = AutoProcessor.from_pretrained(
    MODEL_ID,
    trust_remote_code=True
)

def generate_caption(image_path):
    try:
        image = Image.open(image_path).convert("RGB")
    except Exception as e:
        print(f"Error opening {image_path}: {e}")
        return None

    # Prompt for detailed description
    prompt = "<MORE_DETAILED_CAPTION>"

    inputs = processor(text=prompt, images=image, return_tensors="pt").to(DEVICE)

    with torch.no_grad():
        generated_ids = model.generate(
            input_ids=inputs["input_ids"],
            pixel_values=inputs["pixel_values"],
            max_new_tokens=1024,
            num_beams=3
        )

    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]

    parsed_answer = processor.post_process_generation(
        generated_text,
        task=prompt,
        image_size=(image.width, image.height)
    )

    return parsed_answer[prompt]

# --- MAIN LOOP ---
if not os.path.exists(IMAGE_FOLDER):
    os.makedirs(IMAGE_FOLDER)
    print(f"Created folder {IMAGE_FOLDER}. Please upload your images there!")

files = [f for f in os.listdir(IMAGE_FOLDER) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

if len(files) > 0:
    print(f"Found {len(files)} images. Starting processing...")
    for idx, filename in enumerate(files):
        img_path = os.path.join(IMAGE_FOLDER, filename)
        txt_path = os.path.join(IMAGE_FOLDER, os.path.splitext(filename)[0] + ".txt")

        # Skip if already exists
        if os.path.exists(txt_path):
            continue

        caption = generate_caption(img_path)
        if caption:
            with open(txt_path, "w") as f:
                f.write(caption)
            print(f"[{idx+1}/{len(files)}] Captioned {filename}")
else:
    print(f"Folder '{IMAGE_FOLDER}' is empty. Upload images to the left sidebar first.")

In [None]:
import shutil
from google.colab import files

# Zip the folder
shutil.make_archive('my_captioned_cats', 'zip', '/content/cats')

# Trigger download
files.download('my_captioned_cats.zip')

In [None]:
from google.colab import drive
import shutil
import os

# 1. Mount your Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

# 2. Define source and destination
source_path = '/content/my_captioned_cats.zip'
destination_path = '/content/drive/MyDrive/my_captioned_cats_backup.zip'

# 3. Copy the file (Instant transfer)
if os.path.exists(source_path):
    print(f"Copying {source_path} to Google Drive...")
    shutil.copy(source_path, destination_path)
    print(f"✅ SUCCESS! File saved to: {destination_path}")
    print("You can now safely close this tab or restart runtime.")
else:
    print("❌ ERROR: Could not find the zip file! Did you name it correctly?")

In [None]:
import zipfile
import os

# Path to your backup on Drive
zip_path = '/content/drive/MyDrive/my_captioned_cats_backup.zip'

if os.path.exists(zip_path):
    print(f"Checking {zip_path}...")

    # Open the zip file in Read mode
    with zipfile.ZipFile(zip_path, 'r') as z:
        # Get a list of all file names inside
        all_files = z.namelist()

        # Count them
        txt_files = [f for f in all_files if f.endswith('.txt')]
        img_files = [f for f in all_files if f.endswith(('.png', '.jpg', '.jpeg'))]

        print(f"Total Files: {len(all_files)}")
        print(f"Images Found: {len(img_files)}")
        print(f"Text Files Found: {len(txt_files)}")

        if len(txt_files) > 0:
            print("\nSUCCESS! The text files are safely inside.")
            print("Example file:", txt_files[0])
        else:
            print("\nWARNING: No text files found in the zip!")
else:
    print("Could not find the zip file on Drive. Check the path.")