In [None]:
#!pip install transformers
#!pip install datasets

In [None]:
import os
import shutil
import json
import torch
from datasets import load_dataset

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

### Preprocessing

In [None]:
main_dir = "/content/drive/MyDrive/yoga_bot/dataset/"
yoga_names = os.listdir(main_dir)[1:]  # Skipping the first element if it's not a directory

# Function to create visualization-focused text prompts
def create_prompts(yoga_pose_name):
    prompts = [
        f"Describe the {yoga_pose_name} pose.",
        f"How does the {yoga_pose_name} pose look?",
        f"Imagine the {yoga_pose_name} pose. What does it look like?",
        f"Visualize the {yoga_pose_name} pose and describe its details.",
        f"Picture someone in the {yoga_pose_name} pose. How do they look?",
        f"What is the {yoga_pose_name} pose?",
        f"Describe the key features of the {yoga_pose_name} pose.",
        f"How would you visualize the {yoga_pose_name} pose?",
        f"Think about the {yoga_pose_name} pose. What do you see?",
        f"Provide a visual description of the {yoga_pose_name} pose."
    ]
    return prompts

# Dictionary to store yoga poses and their respective prompts
yoga_prompts = {}
for yoga_pose_name in yoga_names:
    yoga_prompts[yoga_pose_name] = create_prompts(yoga_pose_name)

In [1]:
# Function to create train-validation split
def text_img_pairs(main_dir, yoga_prompts):
    data_pairs = []

    for yoga_pose_name, prompts in yoga_prompts.items():
        img_dir = os.path.join(main_dir, yoga_pose_name)
        img_list = os.listdir(img_dir)

        # Create (prompt, image_path) pairs in a balanced way
        for i, img in enumerate(img_list):
            prompt = prompts[i % len(prompts)]
            image_path = os.path.join(img_dir, img)
            data_pairs.append((prompt, image_path))

    return data_pairs

data_pairs = text_img_pairs(main_dir, yoga_prompts)

### Creating new datafolder and metadata for finetuning

### Dcumentation about dataformat to tune: https://huggingface.co/docs/datasets/v2.4.0/en/image_load#imagefolder-with-metadata

In [None]:
# Ensure the destination directory exists
destination_dir = '/content/drive/MyDrive/yoga_bot/yoga_img_dataset'
os.makedirs(destination_dir, exist_ok=True)

# Path to the metadata file
metadata_path = os.path.join(destination_dir, 'metadata.jsonl')

# Initialize the metadata list
metadata = []

# Iterate through the data pairs
for prompt, img_path in data_pairs:
    # Extract the image file name from the original path
    img_name = os.path.basename(img_path)

    # Extract the pose name from the image path
    pose_name = os.path.basename(os.path.dirname(img_path))

    # Define the new image name
    new_img_name = f"{pose_name}_{img_name}"

    # Define the new image path
    new_img_path = os.path.join(destination_dir, new_img_name)

    # Copy the image to the new directory
    shutil.copy(img_path, new_img_path)

    # Create a metadata entry
    metadata.append({"file_name": new_img_name, "text": prompt})

# Write the metadata to the JSONL file
with open(metadata_path, 'w') as f:
    for entry in metadata:
        json.dump(entry, f)
        f.write('\n')

### Clone the Diffusers from HuggingFace Repo

### Documentation for text-to-image tuning : https://github.com/huggingface/diffusers/tree/main/examples/text_to_image

In [None]:
!git clone https://github.com/huggingface/diffusers

In [None]:
!pip install -U -r /content/diffusers/examples/text_to_image/requirements.txt

In [None]:
!accelerate config default --mixed_precision fp16

In [None]:
dataset = load_dataset("imagefolder", data_dir="/content/drive/MyDrive/yoga_bot/yoga_img_dataset", drop_labels=True)

In [None]:
os.environ['MODEL_NAME'] = "CompVis/stable-diffusion-v1-4"
os.environ['DATASET_NAME'] = "/content/drive/MyDrive/yoga_bot/yoga_img_dataset"
os.environ['OUTPUT_DIR'] = "/content/drive/MyDrive/yoga_bot/yoga-stable-diffusion-v1-4-model"
os.environ['TRAIN_DIR'] = "/content/drive/MyDrive/yoga_bot/yoga_img_dataset"
os.environ['OUTPUT_DIR1'] = "/content/drive/MyDrive/yoga_bot/yoga-stable-diffusion-v1-4-model-3000st"

In [None]:
!accelerate launch --mixed_precision="fp16" /content/diffusers/examples/text_to_image/train_text_to_image.py \
  --pretrained_model_name_or_path=$MODEL_NAME \
  --train_data_dir=$TRAIN_DIR \
  --use_ema \
  --resolution=512 --center_crop --random_flip \
  --train_batch_size=1 \
  --gradient_accumulation_steps=4 \
  --gradient_checkpointing \
  --max_train_steps=3000 \
  --learning_rate=1e-05 \
  --max_grad_norm=1 \
  --lr_scheduler="constant" --lr_warmup_steps=0 \
  --output_dir=$OUTPUT_DIR1