In [1]:
%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    import torch; v = re.match(r"[0-9\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.57.0
!pip install --no-deps trl==0.22.2

In [2]:
%%capture
!wget https://huggingface.co/datasets/barryallen16/fitcheck-annotate-dataset/resolve/main/fitcheck-dataset.zip
!unzip fitcheck-dataset.zip

In [3]:
!pip install qwen-vl-utils

import torch
from unsloth import FastVisionModel
from qwen_vl_utils import process_vision_info
import json
from PIL import Image
import os
import pandas as pd
import time
from tqdm import tqdm

print("Loading Qwen2.5-VL with Unsloth optimization...")

# Load model with Unsloth - Much faster and more memory efficient!
model, processor = FastVisionModel.from_pretrained(
    # "unsloth/Qwen2.5-VL-7B-Instruct-unsloth-bnb-4bit",  # 7B model now fits in memory!
    "unsloth/Qwen3-VL-4B-Instruct-unsloth-bnb-4bit",
    load_in_4bit=True,
    use_gradient_checkpointing="unsloth",  # Unsloth's optimized checkpointing
)

# Enable inference mode (important for speed)
FastVisionModel.for_inference(model)

print("✅ Model loaded with Unsloth optimization!")

Collecting qwen-vl-utils
  Downloading qwen_vl_utils-0.0.14-py3-none-any.whl.metadata (9.0 kB)
Collecting av (from qwen-vl-utils)
  Downloading av-16.0.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (4.6 kB)
Downloading qwen_vl_utils-0.0.14-py3-none-any.whl (8.1 kB)
Downloading av-16.0.1-cp312-cp312-manylinux_2_28_x86_64.whl (40.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 MB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: av, qwen-vl-utils
Successfully installed av-16.0.1 qwen-vl-utils-0.0.14
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
Loading Qwen2.5-VL with Unsloth optimization...
==((====))==  Unsloth 2025.10.5: Fast Qwen3_Vl patching. Transformers: 4.57.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        / 

model.safetensors:   0%|          | 0.00/4.30G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/213 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/782 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/707 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

video_preprocessor_config.json:   0%|          | 0.00/817 [00:00<?, ?B/s]

✅ Model loaded with Unsloth optimization!


In [4]:
def classify_clothing_unsloth(image_path, context=None):
    """
    Classify clothing using Unsloth-optimized Qwen2-VL model
    Returns structured JSON metadata

    Args:
        image_path: Path to the image file
        context: Optional context string to guide classification (e.g., "women's festive wear", "men's traditional")
    """

    # Load image
    image = Image.open(image_path).convert('RGB')

    # Detailed prompt for Indian ethnic wear
    base_prompt = """Analyze this clothing item image. Focus only on the garment, ignore background and people."""

    # Add context if provided
    if context:
        base_prompt += f"\n\nContext: {context}"

    base_prompt += """

Provide detailed analysis in ONLY valid JSON format:

{
  "specific_type": "detailed garment name (e.g., silk saree, embroidered women kurta)",
  "category": "exact category from this list: saree/women_kurta/leggings_salwar/palazzo/lehenga/dupatta/blouse/gown/dhoti_pants/petticoats/women_mojari/women_anarkali_kurta/women_a_line_kurta/men_kurta/nehru_jacket/sherwani/men_mojari/men_pagdi",
  "color_primary": "dominant color with shade (e.g., deep maroon)",
  "color_secondary": ["secondary color1", "secondary color2"],
  "pattern": "design pattern (floral/paisley/solid/geometric/embroidery/prints/striped/checkered)",
  "material": "fabric type (silk/cotton/chiffon/georgette/denim/linen/crepe/velvet/brocade/chanderi/banarasi)",
  "style": "traditional/contemporary/fusion/casual/formal/festive",
  "occasions": ["wedding", "festival", "casual", "office", "party", "daily_wear"],
  "weather": ["summer", "winter", "monsoon", "all_season"],
  "formality": "casual/semi_formal/formal/festive",
  "embellishments": ["embroidery", "sequins", "prints", "zari_work", "mirror_work", "gota_patti", "stone_work", "thread_work", "plain"],
  "gender": "male/female/unisex",
  "fit": "loose/fitted/flowy/structured/regular"
}

Return ONLY the JSON object, no other text."""

    prompt = base_prompt

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": prompt}
            ]
        }
    ]

    # Prepare inputs
    text = processor.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    image_inputs, video_inputs = process_vision_info(messages)

    inputs = processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt"
    )

    inputs = inputs.to("cuda")

    # Generate classification with Unsloth optimization
    with torch.no_grad():
        generated_ids = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.3,
            use_cache=True  # Unsloth optimizes KV cache
        )

    # Decode output
    generated_ids_trimmed = [
        out_ids[len(in_ids):]
        for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]

    output_text = processor.batch_decode(
        generated_ids_trimmed,
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False
    )[0]

    # Parse JSON
    try:
        json_start = output_text.find('{')
        json_end = output_text.rfind('}') + 1
        json_str = output_text[json_start:json_end]
        classification = json.loads(json_str)

        # Check for template responses
        if (classification.get("specific_type") == "detailed garment name (e.g., silk saree, embroidered kurti)" or
            classification.get("category") == "exact category: kurta/kurti/palazzo/churidar/salwar/saree/lehenga/anarkali_suit/gown/dupatta/blouse/choli/dhoti_pants/skirt/shirt/t_shirt/jeans/trousers/crop_top/peplum_top/anarkali_top/cape/jacket/shawl/lehenga_set"):
            print(f"Warning: Template response for {image_path}")
            return None

        return classification
    except json.JSONDecodeError:
        print(f"Warning: Could not parse JSON for {image_path}")
        return None
    except Exception as e:
        print(f"Error parsing for {image_path}: {e}")
        return None

In [5]:
def process_indofashion_unsloth(base_path, output_csv="wardrobe_database_unsloth.csv", output_jsonl="wardrobe_database_unsloth.jsonl", batch_size=1, context_mapper=None):
    """
    Process IndoFashion dataset with Unsloth optimization
    Saves results in both CSV and JSONL formats

    Args:
        base_path: Path to the dataset folder
        output_csv: Output CSV filename
        output_jsonl: Output JSONL filename
        batch_size: Keep at 1 for image processing (models process images one at a time)
        context_mapper: Optional function that takes image_path and returns context string
                       Example: lambda path: "women's festive wear" if "women" in path else "men's wear"
    """
    results = []
    image_files = []

    # Collect all image paths
    for folder_name in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder_name)
        if os.path.isdir(folder_path):
            for image_name in os.listdir(folder_path):
                if image_name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
                    image_files.append(os.path.join(folder_path, image_name))

    print(f"\n{'='*80}")
    print(f"Processing {len(image_files)} images with Unsloth-optimized Qwen2-VL-7B")
    print(f"Expected speedup: 2-5x faster than standard implementation")
    print(f"{'='*80}\n")

    start_time = time.time()

    # Process with progress bar
    for idx, image_path in enumerate(tqdm(image_files, desc="Classifying")):
        try:
            # Get context if mapper is provided
            context = context_mapper(image_path) if context_mapper else None

            classification = classify_clothing_unsloth(image_path, context=context)

            if classification:
                result_entry = {
                    'item_id': f"item_{idx:04d}",
                    'filename': os.path.basename(image_path),
                    'image_path': image_path,
                    'classification': classification
                }
                results.append(result_entry)

            # Save checkpoint every 50 images
            if (idx + 1) % 50 == 0:
                temp_df = pd.DataFrame(results)
                temp_df.to_csv(f"checkpoint_unsloth_{idx+1}.csv", index=False)

                # Save JSONL checkpoint
                with open(f"checkpoint_unsloth_{idx+1}.jsonl", 'w') as f:
                    for item in results:
                        f.write(json.dumps(item) + '\n')

                print(f"\n✅ Checkpoint saved: {idx+1} images processed")

        except Exception as e:
            print(f"\n❌ Error processing {image_path}: {e}")
            continue

    # Calculate metrics
    elapsed_time = time.time() - start_time

    # Save final results
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)

    # Save as JSONL
    with open(output_jsonl, 'w') as f:
        for item in results:
            f.write(json.dumps(item) + '\n')

    print(f"\n{'='*80}")
    print(f"✅ Processing Complete!")
    print(f"✅ Processed: {len(results)} images")
    print(f"✅ Time taken: {elapsed_time/60:.1f} minutes")
    if len(results) > 0:
        print(f"✅ Average: {elapsed_time/len(results):.2f} seconds per image")
    print(f"✅ Saved to CSV: {output_csv}")
    print(f"✅ Saved to JSONL: {output_jsonl}")
    print(f"{'='*80}\n")

    return df

In [6]:
def get_context_from_path(image_path):
    return f"image path: {image_path}"

In [7]:
# @title
# def get_context_from_path(image_path):
    # """Extract context from folder structure or filename"""
    # folder_name = os.path.basename(os.path.dirname(image_path)).lower()
    # # Example mappings based on your dataset structure
    # if 'women' in folder_name or 'female' in folder_name:
    #     return "women's ethnic wear"
    # elif 'men' in folder_name or 'male' in folder_name:
    #     return "men's ethnic wear"
    # elif 'saree' in folder_name:
    #     return "traditional saree"
    # elif 'kurta' in folder_name:
    #     return "kurta/kurti"
    # elif 'lehenga' in folder_name:
    #     return "festive lehenga"
    # elif 'sherwani' in folder_name or 'nehru' in folder_name:
    #     return "men's formal traditional wear"
    # else:
    #     return None  # No context if unclear

In [8]:
# df = process_indofashion_unsloth("fitcheck-dataset/")
df = process_indofashion_unsloth(
        "fitcheck-dataset/",
        context_mapper=get_context_from_path
    )



Processing 42 images with Unsloth-optimized Qwen2-VL-7B
Expected speedup: 2-5x faster than standard implementation



Classifying: 100%|██████████| 42/42 [12:32<00:00, 17.92s/it]


✅ Processing Complete!
✅ Processed: 42 images
✅ Time taken: 12.5 minutes
✅ Average: 17.92 seconds per image
✅ Saved to CSV: wardrobe_database_unsloth.csv
✅ Saved to JSONL: wardrobe_database_unsloth.jsonl




