In [1]:
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    import torch; v = re.match(r"[0-9\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.57.0
!pip install --no-deps trl==0.22.2

In [None]:
%%capture
!wget https://huggingface.co/datasets/barryallen16/fitcheck-annotate-dataset/resolve/main/fitcheck-dataset.zip
!unzip fitcheck-dataset.zip

In [None]:
!pip install qwen-vl-utils

import torch
from unsloth import FastVisionModel
from qwen_vl_utils import process_vision_info
import json
from PIL import Image
import os
import pandas as pd
import time
from tqdm import tqdm

print("Loading Qwen2.5-VL with Unsloth optimization...")

# Load model with Unsloth - Much faster and more memory efficient!
model, processor = FastVisionModel.from_pretrained(
    # "unsloth/Qwen2.5-VL-7B-Instruct-unsloth-bnb-4bit",  # 7B model now fits in memory!
    "unsloth/Qwen3-VL-8B-Instruct-unsloth-bnb-4bit",
    # "unsloth/Qwen3-VL-4B-Instruct-unsloth-bnb-4bit",
    load_in_4bit=True,
    use_gradient_checkpointing="unsloth",  # Unsloth's optimized checkpointing
)

# Enable inference mode (important for speed)
FastVisionModel.for_inference(model)

print("✅ Model loaded with Unsloth optimization!")

In [None]:
def classify_clothing_unsloth(image_path):
    """
    Classify clothing using Unsloth-optimized Qwen2-VL model
    Returns structured JSON metadata
    """

    # Load image
    image = Image.open(image_path).convert('RGB')

    # Detailed prompt for Indian ethnic wear
    prompt = """Analyze this clothing item image. Focus only on the garment, ignore background and people.

Provide detailed analysis in ONLY valid JSON format:

{
  "specific_type": "detailed garment name (e.g., silk saree, embroidered kurti)",
  "category": "exact category: kurta/kurti/palazzo/churidar/salwar/saree/lehenga/anarkali_suit/gown/dupatta/blouse/choli/dhoti_pants/skirt/shirt/t_shirt/jeans/trousers/crop_top/peplum_top/anarkali_top/cape/jacket/shawl/lehenga_set",
  "color_primary": "dominant color with shade (e.g., deep maroon)",
  "color_secondary": ["secondary color1", "secondary color2"],
  "pattern": "design pattern (floral/paisley/solid/geometric/embroidery/prints)",
  "material": "fabric type (silk/cotton/chiffon/georgette/denim/linen/crepe)",
  "style": "traditional/contemporary/fusion/casual/formal/festive",
  "occasions": ["wedding", "festival", "casual", "office", "party", "daily_wear"],
  "weather": ["summer", "winter", "monsoon", "all_season"],
  "formality": "casual/semi_formal/formal/festive",
  "embellishments": ["embroidery", "sequins", "prints", "zari_work", "mirror_work", "plain"],
  "gender": "male/female/unisex",
  "fit": "loose/fitted/flowy/structured"
}

Return ONLY the JSON object, no other text."""

    # Prepare messages
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": prompt}
            ]
        }
    ]

    # Prepare inputs
    text = processor.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    image_inputs, video_inputs = process_vision_info(messages)

    inputs = processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt"
    )

    inputs = inputs.to("cuda")

    # Generate classification with Unsloth optimization
    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.3,
            use_cache=True  # Unsloth optimizes KV cache
        )

    # Decode output
    generated_ids_trimmed = [
        out_ids[len(in_ids):]
        for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]

    output_text = processor.batch_decode(
        generated_ids_trimmed,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )[0]

    # Parse JSON
    try:
        json_start = output_text.find('{')
        json_end = output_text.rfind('}') + 1
        json_str = output_text[json_start:json_end]
        classification = json.loads(json_str)

        # Check for template responses
        if (classification.get("specific_type") == "detailed garment name (e.g., silk saree, embroidered kurti)" or
            classification.get("category") == "exact category: kurta/kurti/palazzo/churidar/salwar/saree/lehenga/anarkali_suit/gown/dupatta/blouse/choli/dhoti_pants/skirt/shirt/t_shirt/jeans/trousers/crop_top/peplum_top/anarkali_top/cape/jacket/shawl/lehenga_set"):
            print(f"Warning: Template response for {image_path}")
            return None

        return classification
    except json.JSONDecodeError:
        print(f"Warning: Could not parse JSON for {image_path}")
        return None
    except Exception as e:
        print(f"Error parsing for {image_path}: {e}")
        return None

In [None]:
def process_indofashion_unsloth(base_path, output_csv="wardrobe_database_unsloth.csv", output_jsonl="wardrobe_database_unsloth.jsonl", batch_size=1):
    """
    Process IndoFashion dataset with Unsloth optimization
    Saves results in both CSV and JSONL formats
    batch_size: Keep at 1 for image processing (models process images one at a time)
    """

    results = []
    image_files = []

    # Collect all image paths
    for folder_name in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder_name)
        if os.path.isdir(folder_path):
            for image_name in os.listdir(folder_path):
                if image_name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                    image_files.append(os.path.join(folder_path, image_name))

    print(f"\n{'='*80}")
    print(f"Processing {len(image_files)} images with Unsloth-optimized Qwen2-VL-7B")
    print(f"Expected speedup: 2-5x faster than standard implementation")
    print(f"{'='*80}\n")

    start_time = time.time()

    # Process with progress bar
    for idx, image_path in enumerate(tqdm(image_files, desc="Classifying")):
        try:
            classification = classify_clothing_unsloth(image_path)

            if classification:
                result_entry = {
                    'item_id': f"item_{idx:04d}",
                    'filename': os.path.basename(image_path),
                    'image_path': image_path,
                    'classification': classification
                }
                results.append(result_entry)

            # Save checkpoint every 50 images
            if (idx + 1) % 50 == 0:
                temp_df = pd.DataFrame(results)
                temp_df.to_csv(f"checkpoint_unsloth_{idx+1}.csv", index=False)

                # Save JSONL checkpoint
                with open(f"checkpoint_unsloth_{idx+1}.jsonl", 'w') as f:
                    for item in results:
                        f.write(json.dumps(item) + '\n')

                print(f"\n✅ Checkpoint saved: {idx+1} images processed")

        except Exception as e:
            print(f"\n❌ Error processing {image_path}: {e}")
            continue

    # Calculate metrics
    elapsed_time = time.time() - start_time

    # Save final results
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)

    # Save as JSONL
    with open(output_jsonl, 'w') as f:
        for item in results:
            f.write(json.dumps(item) + '\n')

    print(f"\n{'='*80}")
    print(f"✅ Processing Complete!")
    print(f"✅ Processed: {len(results)} images")
    print(f"✅ Time taken: {elapsed_time/60:.1f} minutes")
    if len(results) > 0:
        print(f"✅ Average: {elapsed_time/len(results):.2f} seconds per image")
    print(f"✅ Saved to CSV: {output_csv}")
    print(f"✅ Saved to JSONL: {output_jsonl}")
    print(f"{'='*80}\n")

    return df

In [None]:
df = process_indofashion_unsloth("fitcheck-dataset/")