In [1]:
from google.colab import drive\ndrive.mount('/content/drive')

Mounted at /content/drive\n

In [2]:
# --- REVISED INSTALLATION (Attempt 14: Trying Latest Compatible Versions) ---\n\n# 1. Uninstall *all* potentially conflicting packages.\nprint("Uninstalling all potentially conflicting packages...")\n!pip uninstall -y -q torch torchvision torchaudio xformers bitsandbytes diffusers transformers accelerate sentence-transformers peft huggingface_hub\n\n# 2. Install PyTorch with the latest available CUDA 12.1 compatible version\n#    Let pip resolve to the highest available that matches Colab's CUDA\nprint("Installing PyTorch, torchvision, and torchaudio (latest cu121)...")\n!pip install -qqq torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121\n\n# 3. Install xformers (latest compatible with PyTorch installed above)\nprint("Installing xformers (latest compatible)...")\n!pip install -qqq xformers --index-url https://download.pytorch.org/whl/cu121\n\n# 4. Install bitsandbytes\nprint("Installing bitsandbytes...")\n!pip install -qqq bitsandbytes\n\n# 5. Install the very latest diffusers, transformers, accelerate, peft, huggingface_hub\n#    This assumes they are now compatible with each other and with the latest PyTorch cu121.\nprint("Installing latest diffusers, transformers, accelerate, peft, huggingface_hub...")\n!pip install -qqq diffusers==0.27.1 transformers accelerate peft huggingface_hub==0.20.3\n\n# 6. Install other dependencies\nprint("Installing other dependencies...")\n!pip install -qqq Pillow numpy matplotlib scikit-learn opencv-python gradio controlnet_aux python-dotenv\n\nprint("\n--- ALL INSTALLATIONS ATTEMPTED ---")\nprint("Please RESTART RUNTIME (Runtime > Restart runtime) and then run subsequent cells.")\n\n# ... existing code ...\n\n# --- Hugging Face Login Cell ---\nfrom dotenv import load_dotenv\nimport os\n\n# Load environment variables from .env file\nload_dotenv()\n\n# Get token from environment variable\nHUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')\nif not HUGGINGFACE_TOKEN:\n    raise ValueError("HUGGINGFACE_TOKEN not found in .env file")\n\nfrom huggingface_hub import login\nlogin(token=HUGGINGFACE_TOKEN)



In [2]:
from dotenv import load_dotenv\nimport os\n\n# Load environment variables from .env file\nload_dotenv()\n\n# Get token from environment variable\nHUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN')\nif not HUGGINGFACE_TOKEN:\n    raise ValueError("HUGGINGFACE_TOKEN not found in .env file")\n\nfrom huggingface_hub import login\nlogin(token=HUGGINGFACE_TOKEN)

In [3]:
import os # Make sure os is imported here\n\n# Define paths\noutput_dir = "/content/drive/MyDrive/SketchPoemAI/finetuned_model"\ninstance_data_dir = "/content/drive/MyDrive/mysketches/examples of doodles" # Your sketch directory\ninstance_prompt = "a sksart sketch" # Your unique style token + class prompt\nclass_data_dir = "/content/drive/MyDrive/SketchPoemAI/class_images" # For regularization images\nclass_prompt = "a sketch" # General class prompt\n\n# Create directories (if not already created)\nos.makedirs(output_dir, exist_ok=True)\nos.makedirs(class_data_dir, exist_ok=True)\n\nprint(f"Paths defined:\n  Instance Data: {instance_data_dir}\n  Output: {output_dir}")

Paths defined:\n  Instance Data: /content/drive/MyDrive/mysketches/examples of doodles\n  Output: /content/drive/MyDrive/SketchPoemAI/finetuned_model\n

In [None]:
# --- Optional: BLIP Automated Captioning Cell ---\n# --- I run it previosuly and it generated text files for me , so no need to run it .\nfrom transformers import BlipProcessor, BlipForConditionalGeneration\nfrom PIL import Image\nimport os\nimport torch\n\n# No need to redefine instance_data_dir here if it's defined in the previous cell.\n# Just make sure the previous cell has been run!\n\nprint("Loading BLIP model for captioning...")\nprocessor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")\nmodel = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to("cuda")\nprint("BLIP model loaded.")\n\nprint(f"\nStarting captioning process for images in: {instance_data_dir}")\nprocessed_count = 0\nskipped_count = 0\nerror_count = 0\n\n# Loop through all files in your sketch directory\nfor filename in os.listdir(instance_data_dir):\n    # ... (rest of your BLIP code remains the same) ...\n    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):\n        image_path = os.path.join(instance_data_dir, filename)\n        text_path = os.path.join(instance_data_dir, os.path.splitext(filename)[0] + ".txt")\n\n        if os.path.exists(text_path):\n            skipped_count += 1\n            continue\n\n        try:\n            raw_image = Image.open(image_path).convert("RGB")\n            inputs = processor(raw_image, return_tensors="pt").to("cuda")\n            out = model.generate(**inputs, max_length=50, num_beams=5)\n            caption = processor.decode(out[0], skip_special_tokens=True)\n\n            final_caption = f"{instance_prompt}, {caption}"\n\n            with open(text_path, "w") as f:\n                f.write(final_caption)\n\n            print(f"Caption for {filename}: {final_caption}")\n            processed_count += 1\n\n        except Exception as e:\n            print(f"Error processing {filename}: {e}")\n            error_count += 1\n\ndel model\ndel processor\nif torch.cuda.is_available():\n    torch.cuda.empty_cache()\n    print("Cleared GPU cache.")\n\nprint(f"\n--- Captioning Summary ---")\nprint(f"Processed: {processed_count} images")\nprint(f"Skipped (caption already existed): {skipped_count} images")\nprint(f"Errors: {error_count} images")\nprint("\nRemember to manually review and refine the generated captions for accuracy and style adherence!")

In [4]:
# --- Training Pipeline Cell (Final attempt to get it running - FULL CODE) ---\n\nimport os # Ensure os is imported here for file system operations\n\n# --- Training Parameters ---\nmodel_name = "runwayml/stable-diffusion-v1-5" # Base model\nresolution = 512\ntrain_batch_size = 1\ngradient_accumulation_steps = 1\nlearning_rate = 1e-4\nlr_scheduler = "cosine"\nlr_warmup_steps = 100\nmax_train_steps = 5000\nsave_steps = 1000\nseed = 42\n\n# These variables should be defined from your "Define Paths & Create Directories" cell (Cell [5]).\n# They are included here as comments just for reference.\n\nprint(f"\nStarting training with {model_name} and LoRA...")\nprint(f"Dataset path: {instance_data_dir}") # This variable should now be defined\nprint(f"Instance prompt: '{instance_prompt}'") # This variable should now be defined\nprint(f"Output directory: {output_dir}") # This variable should now be defined\nprint(f"Max training steps: {max_train_steps}")\n\n\n# --- CRITICAL: Ensure the script downloads and is found ---\nscript_url = "https://raw.githubusercontent.com/huggingface/diffusers/v0.27.1/examples/dreambooth/train_dreambooth_lora.py"\nscript_local_path = "/content/train_dreambooth_lora.py" # The desired path for the downloaded script\n\nprint(f"Attempting to download train_dreambooth_lora.py script to: {script_local_path}")\nprint(f"From URL: {script_url}")\n\n# Ensure we are in /content directory before downloading, so the script goes to the correct place\n%cd /content\nprint(f"Current working directory before download: {os.getcwd()}")\n\n# Use !wget to download the script. -q for quiet, -O to specify output file.\n!wget -q -O {script_local_path} {script_url}\n\n# Verify the script was downloaded\nif os.path.exists(script_local_path):\n    print(f"Successfully downloaded script: {script_local_path}")\nelse:\n    print(f"ERROR: Script not found after download attempt at {script_local_path}. Please check the URL or your network connection.")\n    raise FileNotFoundError(f"Training script not found: {script_local_path}")\n\nprint(f"Current working directory for accelerate launch: {os.getcwd()}")\n\n\n# --- Command to run the DreamBooth LoRA training script ---\n# We directly execute the downloaded script using its full path ({script_local_path})\n!accelerate launch {script_local_path} \
    --pretrained_model_name_or_path="$model_name" \
    --instance_data_dir="$instance_data_dir" \
    --instance_prompt="$instance_prompt" \
    --output_dir="$output_dir" \
    --mixed_precision="fp16" \
    --resolution=$resolution \
    --train_batch_size=$train_batch_size \
    --gradient_accumulation_steps=$gradient_accumulation_steps \
    --learning_rate=$learning_rate \
    --lr_scheduler=$lr_scheduler \
    --lr_warmup_steps=$lr_warmup_steps \
    --max_train_steps=$max_train_steps \
    --checkpointing_steps=$save_steps \
    --seed=$seed \
    --with_prior_preservation --class_data_dir="$class_data_dir" --class_prompt="$class_prompt" \
    --num_class_images=200 \
    --validation_prompt="a sksart sketch of a tree" \
\nprint(f"\nModel training complete. LoRA weights saved to: {output_dir}")\nprint("You can now proceed to the Inference Pipeline cell to generate sketches from poems.")



In [6]:
# --- Inference Pipeline: Cell 1 - Load the Fine-tuned Model ---\n\nfrom diffusers import StableDiffusionPipeline\nimport torch\nimport os\n\n# Define the base model (should match the one you used for training)\nbase_model_name = "runwayml/stable-diffusion-v1-5"\n\n# Define the path to your fine-tuned LoRA weights\n# The training script saves LoRA weights in a subfolder like 'checkpoint-LAST_CHECKPOINT_NUMBER'\n# This code will automatically find the latest checkpoint.\n\noutput_dir = "/content/drive/MyDrive/SketchPoemAI/finetuned_model" # This variable should be defined from your earlier 'Define Paths' cell\n\nlatest_checkpoint_path = None\nif os.path.exists(output_dir):\n    # Find all checkpoint folders\n    checkpoint_folders = [f for f in os.listdir(output_dir) if f.startswith('checkpoint-')]\n    if checkpoint_folders:\n        # Get the highest numbered checkpoint folder (e.g., checkpoint-5000)\n        latest_checkpoint_folder = max(checkpoint_folders, key=lambda x: int(x.split('-')[1]))\n        latest_checkpoint_path = os.path.join(output_dir, latest_checkpoint_folder)\n        # Construct the full path to the weights file\n        lora_model_path = os.path.join(latest_checkpoint_path, "pytorch_lora_weights.safetensors")\n    else:\n        print(f"No checkpoint folders found in {output_dir}.")\n        lora_model_path = None\nelse:\n    print(f"Output directory {output_dir} does not exist. Please check your path.")\n    lora_model_path = None\n\n# Proceed only if the LoRA weights path is valid\nif lora_model_path and os.path.exists(lora_model_path):\n    print(f"Found LoRA weights at: {lora_model_path}")\n    print("Loading base Stable Diffusion pipeline...")\n    # Load the base model; it will download if not cached\n    pipe = StableDiffusionPipeline.from_pretrained(base_model_name, torch_dtype=torch.float16)\n\n    print("Loading LoRA weights into the pipeline...")\n    # Load your custom LoRA weights\n    pipe.load_lora_weights(os.path.dirname(lora_model_path), weight_name=os.path.basename(lora_model_path))\n\n    # Move the entire model to the GPU for inference\n    pipe.to("cuda")\n    print("Fine-tuned model loaded successfully for inference!")\nelse:\n    print("ERROR: LoRA model weights not found. Cannot proceed with inference.")\n    print("Please ensure your training process completed and saved the weights to the correct 'output_dir'.")\n    pipe = None # Set pipe to None if loading failed, to prevent further errors

No checkpoint folders found in /content/drive/MyDrive/SketchPoemAI/finetuned_model.\nERROR: LoRA model weights not found. Cannot proceed with inference.\nPlease ensure your training process completed and saved the weights to the correct 'output_dir'.\n

In [7]:
# --- Debugging: Check Contents of Output Directory ---\nimport os\n\noutput_dir = "/content/drive/MyDrive/SketchPoemAI/finetuned_model"\n\nprint(f"Checking contents of: {output_dir}")\nif os.path.exists(output_dir):\n    contents = os.listdir(output_dir)\n    if contents:\n        print("Contents found:")\n        for item in contents:\n            print(f"- {item}")\n\n        checkpoint_folders = [f for f in contents if f.startswith('checkpoint-')]\n        if checkpoint_folders:\n            print(f"\nDetected checkpoint folders: {checkpoint_folders}")\n            latest_checkpoint_folder = max(checkpoint_folders, key=lambda x: int(x.split('-')[1]))\n            potential_lora_path = os.path.join(output_dir, latest_checkpoint_folder, "pytorch_lora_weights.safetensors")\n            print(f"Looking for LoRA weights at: {potential_lora_path}")\n            if os.path.exists(potential_lora_path):\n                print("\nSUCCESS: LoRA weights file EXISTS at this path!")\n                print("You can proceed to run the 'Inference Pipeline: Cell 1 - Load the Fine-tuned Model' again.")\n            else:\n                print("\nERROR: LoRA weights file DOES NOT EXIST at the expected path inside the checkpoint folder.")\n                print("Double-check the file name inside your checkpoint folder.")\n        else:\n            print("\nERROR: No folders starting with 'checkpoint-' found.")\n    else:\n        print("\nDirectory is empty.")\nelse:\n    print(f"\nERROR: Directory {output_dir} does NOT exist.")\n\nprint("\nPlease verify the actual contents of your Google Drive.")

Checking contents of: /content/drive/MyDrive/SketchPoemAI/finetuned_model\n\nDirectory is empty.\n\nPlease verify the actual contents of your Google Drive.\n

In [None]:
# --- Inference Pipeline: Cell 2 - Poem Understanding (NLP) and Prompt Construction ---\n\nfrom transformers import pipeline\nimport random\nfrom collections import Counter # Import Counter for keyword extraction\n\n# Load a basic sentiment analyzer. This is a robust model for emotion detection.\nsentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest")\n\n# 'instance_prompt' should be defined from your earlier 'Define Paths' cell (e.g., "a sksart sketch").\n# We use it here to ensure the generated images adhere to your style.\n# instance_prompt = "a sksart sketch"\n\ndef analyze_poem_for_prompt(poem_text, style_token=instance_prompt):\n    """\n    Analyzes a poem for sentiment and key concepts, then constructs a Stable Diffusion prompt.\n    """\n    if not poem_text.strip(): # Handle empty poem input\n        return "", "" # Return empty prompts if no poem text\n\n    sentiment_result = sentiment_analyzer(poem_text)[0]\n    sentiment_label = sentiment_result['label'] # e.g., 'POSITIVE', 'NEGATIVE', 'NEUTRAL'\n    \n    # Simple keyword extraction: split words, filter short/non-alphabetic, count frequency\n    words = [word.lower() for word in poem_text.replace('\n', ' ').split() if len(word) > 3 and word.isalpha()]\n    \n    # Map sentiment to abstract visual concepts. These influence the overall mood.\n    abstract_concepts = ""\n    if sentiment_label == "POSITIVE":\n        abstract_concepts = "light, flowing shapes, harmony, ascent, joyful, bright, vibrant"\n    elif sentiment_label == "NEGATIVE":\n        abstract_concepts = "jagged lines, heavy forms, darkness, descent, fragmented, melancholic, chaotic, somber"\n    elif sentiment_label == "NEUTRAL":\n        abstract_concepts = "balanced forms, subtle shifts, contemplative, stillness, ethereal, dreamlike, serene"\n    \n    # Extract a few literal keywords from the poem to add specificity, filtering out common words\n    word_counts = Counter(words)\n    # Filter common English words that might not add visual meaning\n    common_filler_words = {"the", "and", "that", "with", "from", "for", "are", "was", "has", "have", "you", "they", "this", "but", "not", "its", "her", "his", "their", "our", "there", "when", "where", "what", "which", "who", "whom", "whose", "why", "how", "then", "than", "more", "most", "each", "some", "such", "into", "onto", "upon", "about", "above", "across", "after", "again", "against", "among", "around", "at", "before", "behind", "below", "beneath", "beside", "between", "beyond", "down", "during", "except", "inside", "like", "near", "off", "on", "out", "outside", "over", "past", "through", "under", "until", "up", "while", "within", "without", "will", "would", "could", "should", "might", "must", "can", "may"}\n    \n    top_keywords = [\n        word for word, count in word_counts.most_common(5) # Get top 5 most common words\n        if word not in common_filler_words and word.isalpha() # Filter out filler words and non-alphabetic\n    ]\n    \n    # Construct the primary positive prompt\n    # Start with your style token, then abstract concepts, then keywords for specificity\n    prompt = f"{style_token} of {abstract_concepts}"\n    if top_keywords:\n        prompt += f", depicting {', '.join(top_keywords)}"\n    \n    # Add general style descriptors that define your "sketch" aesthetic\n    prompt += ", expressive lines, abstract visual storytelling, hand-drawn, ink drawing, charcoal, graphite pencil, high contrast, deep shadows, minimalist, evocative"\n    \n    # Define negative prompt to avoid undesirable elements\n    negative_prompt = "text, words, realistic, deformed, blurry, ugly, distorted, low quality, human figures, cartoon, photography, watermark, signature, cluttered, repetitive patterns, cartoon, anime, 3d, digital art"\n    \n    return prompt, negative_prompt\n\n# You can test the prompt generation here before using the Gradio UI\n# test_poem = """\n# The forgotten echoes of a dream,\n# A fractured silence, a weeping stream.\n# Through broken glass, new light now gleams,\n# Hope's fragile tendrils, in silver beams.\n# """\n# generated_prompt, generated_neg_prompt = analyze_poem_for_prompt(test_poem)\n# print(f"\n--- Test Prompt ---")\n# print(f"Positive: {generated_prompt}")\n# print(f"Negative: {generated_neg_prompt}")

In [None]:
# --- Inference Pipeline: Cell 3 - Interactive Tool (Gradio UI) ---\n\nimport gradio as gr\nimport torch\nimport random\nfrom PIL import Image\n\n# Ensure 'pipe' (your loaded model) is available from Cell 1.\n# If Cell 1 failed or wasn't run, 'pipe' won't exist or will be None.\nif 'pipe' not in locals() or pipe is None:\n    print("Model 'pipe' not loaded. Please run 'Cell 1: Load the Fine-tuned Model' first.")\n    print("If you just restarted the runtime, remember to run all preceding setup cells (Mount Drive, HF Login, Define Paths) first.")\n    # Exit here if model is not loaded, as Gradio would fail anyway\n    # This will prevent the Gradio UI from trying to launch if the model isn't ready.\n    raise RuntimeError("Fine-tuned model is not loaded. Cannot launch Gradio UI.")\n\n\ndef generate_sketch_ui(poem_text, num_variations=1, guidance_scale=7.5, seed_input=-1):\n    """\n    Generates sketches based on a poem using the loaded Stable Diffusion pipeline.\n    """\n    if not poem_text.strip():\n        # Return empty images if no poem is provided\n        return [None, None, None] \n\n    # Generate the base and negative prompts from the poem using the function defined in Cell 2\n    prompt, negative_prompt = analyze_poem_for_prompt(poem_text)\n\n    generated_images = []\n    \n    # Determine the seed(s) for generation\n    # If seed_input is -1, generate random seeds for each variation.\n    # Otherwise, use the provided seed and increment for each variation to get reproducible variations.\n    initial_seed = seed_input if seed_input != -1 else random.randint(0, 1000000)\n\n    for i in range(num_variations):\n        current_seed = initial_seed + i\n        # Create a torch generator for reproducibility\n        generator = torch.Generator("cuda").manual_seed(current_seed)\n        \n        print(f"\n--- Generating Variation {i+1} ---")\n        print(f"Seed: {current_seed}")\n        print(f"Positive Prompt: {prompt}")\n        print(f"Negative Prompt: {negative_prompt}")\n\n        try:\n            # Generate the image using your fine-tuned pipeline\n            image = pipe(\n                prompt,\n                negative_prompt=negative_prompt,\n                num_inference_steps=50, # 50 steps is a good balance for quality/speed\n                guidance_scale=guidance_scale,\n                generator=generator\n            ).images[0]\n            generated_images.append(image)\n        except Exception as e:\n            print(f"Error generating image for variation {i+1}: {e}")\n            generated_images.append(None) # Append None if generation fails for a variation\n\n    # Gradio requires a fixed number of outputs. Pad with None if fewer than 3 variations were requested.\n    while len(generated_images) < 3:\n        generated_images.append(None)\n            \n    return generated_images[0], generated_images[1], generated_images[2]\n\n\n# --- Gradio Interface Setup ---\nprint("\nLaunching Gradio UI...")\nwith gr.Blocks() as demo:\n    gr.Markdown("# 🎨 Poem to Sketch AI Agent ✍️")\n    gr.Markdown("Input a poem, and the AI will generate unique, abstract sketches in your custom artistic style!")\n\n    with gr.Row():\n        with gr.Column():\n            poem_input = gr.Textbox(\n                label="✍️ Enter your poem here:",\n                lines=8,\n                placeholder="Example:\n'The ancient oak, a silent sentinel, \nRooted deep in earth\'s forgotten lore. \nIts branches stretch, a twisted, gnarled farewell, \nTo sunlit dreams that visit nevermore.'"\n            )\n            num_variations_slider = gr.Slider(\n                minimum=1,\n                maximum=3,\n                step=1,\n                value=1,\n                label="🖼️ Number of Sketch Variations"\n            )\n            guidance_scale_slider = gr.Slider(\n                minimum=5.0,\n                maximum=15.0,\n                step=0.5,\n                value=7.5,\n                label="💡 Guidance Scale (How strictly to follow the poem\'s mood & style)"\n            )\n            seed_input = gr.Number(\n                label="🎲 Seed (-1 for random, helps with reproducibility)",\n                value=-1,\n                step=1,\n                precision=0\n            )\n            generate_button = gr.Button("✨ Generate Sketches!")\n\n        with gr.Column():\n            output_image_1 = gr.Image(label="Generated Sketch 1", width=512, height=512)\n            output_image_2 = gr.Image(label="Generated Sketch 2", width=512, height=512)\n            output_image_3 = gr.Image(label="Generated Sketch 3", width=512, height=512)\n\n    # Bind the button click to the generation function\n    generate_button.click(\n        fn=generate_sketch_ui,\n        inputs=[poem_input, num_variations_slider, guidance_scale_slider, seed_input],\n        outputs=[output_image_1, output_image_2, output_image_3]\n    )\n\n# Launch the Gradio app. share=True generates a public, temporary URL.\ndemo.launch(share=True, debug=True)\n\nprint("\n--- Gradio UI Launched! ---")\nprint("Look for the public URL (usually ending in '.gradio.live') in the output above.")\nprint("Click on it to open your interactive Poem-to-Sketch AI agent in a new tab.")