<a href="https://colab.research.google.com/github/XenO2H1/raksha.ai/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import zipfile
import cv2
import mediapipe as mp
import csv

# --- SETUP CHECK ---
try:
    mp_hands = mp.solutions.hands
    print("SUCCESS: MediaPipe loaded correctly!")
except AttributeError:
    print("CRITICAL ERROR: The restart didn't work. Please manually click Runtime > Restart Session.")
    exit()

# --- FIND DATASET ---
print("--- SEARCHING FOR DATASET ---")
# The zip usually stays in /content/ after a restart
zip_path = "/content/dataset.zip"

if not os.path.exists(zip_path):
    # Try to find it if it moved
    if os.path.exists("/dataset.zip"): zip_path = "/dataset.zip"
    else:
        print("ERROR: dataset.zip is gone! Please drag and drop it into Files again.")
        exit()

# --- UNZIP ---
extract_path = "/content/temp_data/"
# We unzip again just to be safe (overwriting is fast)
print(f"Unzipping {zip_path}...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# --- LOCATE FOLDERS ---
print("--- LOCATING IMAGES ---")
TARGET_DATA_DIR = None
for root, dirs, files in os.walk(extract_path):
    if len(dirs) > 10:
        if 'A' in dirs or 'a' in dirs or '0' in dirs:
            TARGET_DATA_DIR = root
            print(f"Found data in: {TARGET_DATA_DIR}")
            break

if not TARGET_DATA_DIR:
    print("ERROR: Could not find image folders.")
else:
    # --- EXTRACTION ---
    OUTPUT_FILE = "/content/data.csv"
    hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

    f = open(OUTPUT_FILE, 'w', newline='')
    writer = csv.writer(f)

    header = ['label']
    for i in range(21):
        header.append(f'x{i}')
        header.append(f'y{i}')
    writer.writerow(header)

    print(f"--- PROCESSING (This takes ~2 mins) ---")

    count = 0
    for dir_name in sorted(os.listdir(TARGET_DATA_DIR)):
        dir_path = os.path.join(TARGET_DATA_DIR, dir_name)
        if os.path.isdir(dir_path):
            for img_name in os.listdir(dir_path):
                img_path = os.path.join(dir_path, img_name)
                if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img = cv2.imread(img_path)
                    if img is None: continue

                    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    results = hands.process(img_rgb)

                    if results.multi_hand_landmarks:
                        hand_landmarks = results.multi_hand_landmarks[0]
                        data_row = [dir_name]
                        base_x = hand_landmarks.landmark[0].x
                        base_y = hand_landmarks.landmark[0].y
                        for landmark in hand_landmarks.landmark:
                            data_row.append(landmark.x - base_x)
                            data_row.append(landmark.y - base_y)
                        writer.writerow(data_row)
                        count += 1

    f.close()
    print(f"\nDONE! Extracted {count} samples.")
    print(f"Refresh the Files tab and download 'data.csv'")

ModuleNotFoundError: No module named 'mediapipe'

In [2]:
# --- STEP 1: INSTALL CORRECT VERSIONS ---
print("Installing compatible libraries...")
!pip install mediapipe==0.10.14 protobuf==3.20.3 opencv-python
print("Installation Complete.")

# --- STEP 2: RUN EXTRACTION ---
import os
import zipfile
import cv2
import mediapipe as mp
import csv

# Setup MediaPipe
try:
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
    print("SUCCESS: MediaPipe is ready!")
except AttributeError:
    print("ERROR: Something went wrong with the install. Please try running this cell one more time.")
    exit()

# Find Dataset
print("--- SEARCHING FOR DATASET ---")
zip_path = "/content/dataset.zip"
if not os.path.exists(zip_path):
    if os.path.exists("/dataset.zip"): zip_path = "/dataset.zip"
    else:
        print("ERROR: 'dataset.zip' not found. Please drag and drop it into the Files sidebar again.")
        exit()

# Unzip
extract_path = "/content/temp_data/"
if not os.path.exists(extract_path):
    print(f"Unzipping {zip_path}...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

# Locate Folders
print("--- LOCATING IMAGES ---")
TARGET_DATA_DIR = None
for root, dirs, files in os.walk(extract_path):
    if len(dirs) > 10:
        if 'A' in dirs or 'a' in dirs or '0' in dirs:
            TARGET_DATA_DIR = root
            break

if not TARGET_DATA_DIR:
    print("ERROR: Could not find image folders inside the zip.")
else:
    # Extract
    OUTPUT_FILE = "/content/data.csv"
    f = open(OUTPUT_FILE, 'w', newline='')
    writer = csv.writer(f)

    header = ['label']
    for i in range(21):
        header.append(f'x{i}')
        header.append(f'y{i}')
    writer.writerow(header)

    print(f"--- PROCESSING (Takes ~2 mins) ---")
    count = 0
    for dir_name in sorted(os.listdir(TARGET_DATA_DIR)):
        dir_path = os.path.join(TARGET_DATA_DIR, dir_name)
        if os.path.isdir(dir_path):
            for img_name in os.listdir(dir_path):
                img_path = os.path.join(dir_path, img_name)
                if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img = cv2.imread(img_path)
                    if img is None: continue

                    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    results = hands.process(img_rgb)

                    if results.multi_hand_landmarks:
                        hand_landmarks = results.multi_hand_landmarks[0]
                        data_row = [dir_name]
                        base_x = hand_landmarks.landmark[0].x
                        base_y = hand_landmarks.landmark[0].y
                        for landmark in hand_landmarks.landmark:
                            data_row.append(landmark.x - base_x)
                            data_row.append(landmark.y - base_y)
                        writer.writerow(data_row)
                        count += 1

    f.close()
    print(f"\nDONE! Extracted {count} samples.")
    print(f"Download 'data.csv' from the Files sidebar.")

Installing compatible libraries...
Collecting mediapipe==0.10.14
  Using cached mediapipe-0.10.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting protobuf==3.20.3
  Using cached protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
INFO: pip is looking at multiple versions of mediapipe to determine which version is compatible with other requirements. This could take a while.
[31mERROR: Cannot install mediapipe==0.10.14 and protobuf==3.20.3 because these package versions have conflicting dependencies.[0m[31m
[0m
The conflict is caused by:
    The user requested protobuf==3.20.3
    mediapipe 0.10.14 depends on protobuf<5 and >=4.25.3

To fix this you could try to:
1. loosen the range of package versions you've specified
2. remove package versions to allow pip to attempt to solve the dependency conflict

[31mERROR: ResolutionImpossible: for help visit https://pip.pypa.io/en/latest/topics/dependency-resolution/#dealing-with-dependency-confl

ModuleNotFoundError: No module named 'mediapipe'

In [3]:
# --- STEP 1: FORCE CORRECT VERSIONS ---
print("--- FIXING LIBRARIES ---")
# Uninstall everything to clear the conflicts
!pip uninstall -y mediapipe protobuf

# Install the "Goldilocks" pair: MediaPipe + Protobuf 4.x (Compatible with Python 3.12)
!pip install mediapipe "protobuf>=4.25.3,<5.0.0" opencv-python

print("\n--- LIBRARIES READY. STARTING EXTRACTION ---")

# --- STEP 2: RUN EXTRACTION IMMEDIATELY ---
import os
import zipfile
import cv2
import mediapipe as mp
import csv

# Setup MediaPipe
try:
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
    print("SUCCESS: MediaPipe loaded correctly!")
except AttributeError:
    # If this hits, it means the uninstall didn't fully clear from memory.
    print("CRITICAL: Please click 'Runtime' -> 'Restart Session' at the top, then run this cell again.")
    exit()

# Find Dataset
print("--- SEARCHING FOR DATASET ---")
zip_path = "/content/dataset.zip"
if not os.path.exists(zip_path):
    # Check alternate location
    if os.path.exists("/dataset.zip"): zip_path = "/dataset.zip"
    else:
        print("ERROR: 'dataset.zip' is missing. Please drag and drop it into the Files sidebar again.")
        exit()

# Unzip
extract_path = "/content/temp_data/"
if not os.path.exists(extract_path):
    print(f"Unzipping {zip_path}...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

# Locate Folders
print("--- LOCATING IMAGES ---")
TARGET_DATA_DIR = None
for root, dirs, files in os.walk(extract_path):
    if len(dirs) > 10:
        if 'A' in dirs or 'a' in dirs or '0' in dirs:
            TARGET_DATA_DIR = root
            break

if not TARGET_DATA_DIR:
    print("ERROR: Could not find image folders inside the zip.")
else:
    # Extract
    OUTPUT_FILE = "/content/data.csv"
    f = open(OUTPUT_FILE, 'w', newline='')
    writer = csv.writer(f)

    header = ['label']
    for i in range(21):
        header.append(f'x{i}')
        header.append(f'y{i}')
    writer.writerow(header)

    print(f"--- PROCESSING (Takes ~2 mins) ---")
    count = 0
    for dir_name in sorted(os.listdir(TARGET_DATA_DIR)):
        dir_path = os.path.join(TARGET_DATA_DIR, dir_name)
        if os.path.isdir(dir_path):
            for img_name in os.listdir(dir_path):
                img_path = os.path.join(dir_path, img_name)
                if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img = cv2.imread(img_path)
                    if img is None: continue

                    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    results = hands.process(img_rgb)

                    if results.multi_hand_landmarks:
                        hand_landmarks = results.multi_hand_landmarks[0]
                        data_row = [dir_name]
                        base_x = hand_landmarks.landmark[0].x
                        base_y = hand_landmarks.landmark[0].y
                        for landmark in hand_landmarks.landmark:
                            data_row.append(landmark.x - base_x)
                            data_row.append(landmark.y - base_y)
                        writer.writerow(data_row)
                        count += 1

    f.close()
    print(f"\nDONE! Extracted {count} samples.")
    print(f"Download 'data.csv' from the Files sidebar.")

--- FIXING LIBRARIES ---
[0mCollecting mediapipe
  Using cached mediapipe-0.10.31-py3-none-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5.0.0,>=4.25.3
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Using cached mediapipe-0.10.31-py3-none-manylinux_2_28_x86_64.whl (10.3 MB)
Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m294.9/294.9 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf, mediapipe
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ydf 0.13.0 requires protobuf<7.0.0,>=5.29.1, but you have protobuf 4.25.8 which is incompatible.
opentelemetry-proto 1.37.0 requires protobuf<7.0,>=5.0, but y


--- LIBRARIES READY. STARTING EXTRACTION ---
CRITICAL: Please click 'Runtime' -> 'Restart Session' at the top, then run this cell again.
--- SEARCHING FOR DATASET ---
--- LOCATING IMAGES ---
--- PROCESSING (Takes ~2 mins) ---


NameError: name 'hands' is not defined

In [1]:
import os
import zipfile
import cv2
import mediapipe as mp
import csv

# --- VERIFY SETUP ---
try:
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
    print("SUCCESS: MediaPipe is working perfectly!")
except AttributeError:
    print("STOP: You did not restart the runtime. Go to Runtime > Restart Session.")
    exit()

# --- FIND DATASET ---
print("--- SEARCHING FOR DATASET ---")
zip_path = "/content/dataset.zip"
if not os.path.exists(zip_path):
    if os.path.exists("/dataset.zip"): zip_path = "/dataset.zip"
    else:
        print("ERROR: dataset.zip is missing. Please re-upload it.")
        exit()

# --- UNZIP (If needed) ---
extract_path = "/content/temp_data/"
if not os.path.exists(extract_path):
    print("Unzipping data...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

# --- LOCATE IMAGES ---
TARGET_DATA_DIR = None
for root, dirs, files in os.walk(extract_path):
    if len(dirs) > 10:
        if 'A' in dirs or 'a' in dirs or '0' in dirs:
            TARGET_DATA_DIR = root
            break

if not TARGET_DATA_DIR:
    print("ERROR: Could not find image folders.")
else:
    # --- PROCESS & SAVE ---
    OUTPUT_FILE = "/content/data.csv"
    f = open(OUTPUT_FILE, 'w', newline='')
    writer = csv.writer(f)

    header = ['label'] + [f'x{i}' for i in range(21)] + [f'y{i}' for i in range(21)]
    writer.writerow(header)

    print("--- PROCESSING (This will take ~60 seconds) ---")
    count = 0
    for dir_name in sorted(os.listdir(TARGET_DATA_DIR)):
        dir_path = os.path.join(TARGET_DATA_DIR, dir_name)
        if os.path.isdir(dir_path):
            for img_name in os.listdir(dir_path):
                img_path = os.path.join(dir_path, img_name)
                if img_path.lower().endswith(('.jpg', '.jpeg', '.png')):
                    img = cv2.imread(img_path)
                    if img is None: continue

                    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    results = hands.process(img_rgb)

                    if results.multi_hand_landmarks:
                        lm = results.multi_hand_landmarks[0]
                        base_x, base_y = lm.landmark[0].x, lm.landmark[0].y
                        # Flatten list: Label, x0, x1... y0, y1...
                        row = [dir_name] + [p.x - base_x for p in lm.landmark] + [p.y - base_y for p in lm.landmark]
                        writer.writerow(row)
                        count += 1

    f.close()
    print(f"\nSUCCESS! Saved {count} samples to data.csv")
    print("Download it now from the Files sidebar!")

ModuleNotFoundError: No module named 'mediapipe'

In [None]:
import os
import sys

# --- PART 1: AUTO-FIX & CHECK ENVIRONMENT ---
try:
    import mediapipe as mp
    import cv2
    import csv
    import zipfile

    # Try to load the specific tool that was breaking
    mp_hands = mp.solutions.hands
    # If this fails, we go to the 'except' block below
    hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
    print("‚úÖ SUCCESS: Environment is ready!")

except (AttributeError, ImportError):
    print("‚ö†Ô∏è BROKEN ENVIRONMENT DETECTED. FIXING NOW...")

    # 1. Force uninstall bad versions
    os.system("pip uninstall -y mediapipe protobuf")

    # 2. Install the exact compatible versions
    print("Installing correct libraries...")
    os.system("pip install mediapipe==0.10.14 protobuf==3.20.3 opencv-python")

    # 3. Crash the runtime to force a restart (This is intentional!)
    print("üîÑ RESTARTING RUNTIME... Please wait 10 seconds and RUN THIS CELL AGAIN.")
    os.kill(os.getpid(), 9)

# --- PART 2: THE DATA EXTRACTION (Only runs if Part 1 passed) ---

# Define the extraction logic
def run_extraction():
    print("--- SEARCHING FOR DATASET ---")
    zip_path = "/content/dataset.zip"
    if not os.path.exists(zip_path):
        if os.path.exists("/dataset.zip"): zip_path = "/dataset.zip"
        else:
            print("‚ùå ERROR: 'dataset.zip' not found. Please drag and drop it to Files again.")
            return

    # Unzip
    extract_path = "/content/temp_data/"
    if not os.path.exists(extract_path):
        print("Unzipping data...")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)

    # Find Folders
    print("--- LOCATING IMAGES ---")
    TARGET_DATA_DIR = None
    for root, dirs, files in os.walk(extract_path):
        if len(dirs) > 10:
            if 'A' in dirs or 'a' in dirs or '0' in dirs:
                TARGET_DATA_DIR = root
                break

    if not TARGET_DATA_DIR:
        print("‚ùå ERROR: Could not find image folders (A-Z) inside the zip.")
        return

    # Extract
    OUTPUT_FILE = "/content/data.csv"
    f = open(OUTPUT_FILE, 'w', newline='')
    writer = csv.writer(f)

    # Header: Label, x0..x20, y0..y20
    header = ['label'] + [f'x{i}' for i in range(21)] + [f'y{i}' for i in range(21)]
    writer.writerow(header)

    print("--- PROCESSING (This takes ~1 minute) ---")
    count = 0
    for dir_name in sorted(os.listdir(TARGET_DATA_DIR)):
        dir_path = os.path.join(TARGET_DATA_DIR, dir_name)
        if os.path.isdir(dir_path):
            for img_name in os.listdir(dir_path):
                img_path = os.path.join(dir_path, img_name)
                if img_path.lower().endswith(('.jpg', '.jpeg', '.png')):
                    img = cv2.imread(img_path)
                    if img is None: continue

                    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    results = hands.process(img_rgb)

                    if results.multi_hand_landmarks:
                        lm = results.multi_hand_landmarks[0]
                        base_x, base_y = lm.landmark[0].x, lm.landmark[0].y
                        row = [dir_name] + [p.x - base_x for p in lm.landmark] + [p.y - base_y for p in lm.landmark]
                        writer.writerow(row)
                        count += 1

    f.close()
    print(f"\nüéâ DONE! Saved {count} samples.")
    print("üëâ Right-click 'data.csv' in the Files tab and Download.")

# Run it
run_extraction()

‚ö†Ô∏è BROKEN ENVIRONMENT DETECTED. FIXING NOW...
Installing correct libraries...


In [None]:
import os

print("--- NUKING CONFLICTING LIBRARIES ---")
# Force uninstall everything to be safe
os.system("pip uninstall -y mediapipe protobuf")

print("--- INSTALLING COMPATIBLE VERSIONS ---")
# These specific versions are 100% compatible on Colab
os.system("pip install mediapipe==0.10.14 protobuf==3.20.3")

print("--- RESTARTING RUNTIME AUTOMATICALLY ---")
print("The session will restart now. You don't need to click anything.")
print("Wait 10 seconds, then run Step 2 below.")

# This kills the current process to force a reload
os.kill(os.getpid(), 9)

--- NUKING CONFLICTING LIBRARIES ---
--- INSTALLING COMPATIBLE VERSIONS ---


In [None]:
import os

print("--- 1. UNINSTALLING BROKEN LIBRARIES ---")
os.system("pip uninstall -y mediapipe protobuf")

print("--- 2. INSTALLING COMPATIBLE VERSIONS ---")
# We force a very specific combination that is stable on Colab
os.system("pip install mediapipe==0.10.14 protobuf==4.25.3 opencv-python")

print("--- 3. RESETTING RUNTIME ---")
print("‚úÖ The session will crash now. This is GOOD.")
print("Wait 5 seconds, then move to Step 2 below.")

# Force kill to clear memory
os.kill(os.getpid(), 9)

--- 1. UNINSTALLING BROKEN LIBRARIES ---
--- 2. INSTALLING COMPATIBLE VERSIONS ---


In [1]:
import os
import cv2
import csv
import zipfile
import mediapipe as mp

# --- ROBUST IMPORT FIX ---
# If standard import fails, we force-load the submodules
try:
    mp_hands = mp.solutions.hands
except AttributeError:
    import mediapipe.python.solutions.hands as mp_hands

# Initialize
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
print("‚úÖ SUCCESS: MediaPipe is working!")

# --- SEARCH FOR DATA ---
print("--- SEARCHING FOR DATASET ---")
zip_path = "/content/dataset.zip"
if not os.path.exists(zip_path):
    # Check alternate location
    if os.path.exists("/dataset.zip"): zip_path = "/dataset.zip"
    else:
        print("‚ùå ERROR: 'dataset.zip' not found. Please drag and drop it into Files again.")
        exit()

# --- UNZIP ---
extract_path = "/content/temp_data/"
if not os.path.exists(extract_path):
    print("Unzipping data...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

# --- FIND IMAGES ---
TARGET_DATA_DIR = None
for root, dirs, files in os.walk(extract_path):
    if len(dirs) > 10:
        if 'A' in dirs or '0' in dirs:
            TARGET_DATA_DIR = root
            break

if not TARGET_DATA_DIR:
    print("‚ùå ERROR: Could not find image folders (A-Z).")
else:
    # --- EXTRACT ---
    OUTPUT_FILE = "/content/data.csv"
    f = open(OUTPUT_FILE, 'w', newline='')
    writer = csv.writer(f)

    # Header
    header = ['label'] + [f'x{i}' for i in range(21)] + [f'y{i}' for i in range(21)]
    writer.writerow(header)

    print("--- PROCESSING (Takes ~60 seconds) ---")
    count = 0
    for dir_name in sorted(os.listdir(TARGET_DATA_DIR)):
        dir_path = os.path.join(TARGET_DATA_DIR, dir_name)
        if os.path.isdir(dir_path):
            for img_name in os.listdir(dir_path):
                img_path = os.path.join(dir_path, img_name)
                if img_path.lower().endswith(('.jpg', '.jpeg', '.png')):
                    img = cv2.imread(img_path)
                    if img is None: continue

                    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    results = hands.process(img_rgb)

                    if results.multi_hand_landmarks:
                        lm = results.multi_hand_landmarks[0]
                        base_x, base_y = lm.landmark[0].x, lm.landmark[0].y
                        row = [dir_name] + [p.x - base_x for p in lm.landmark] + [p.y - base_y for p in lm.landmark]
                        writer.writerow(row)
                        count += 1

    f.close()
    print(f"\nüéâ DONE! Saved {count} samples.")
    print("üëâ Right-click 'data.csv' in the Files tab and Download.")

‚úÖ SUCCESS: MediaPipe is working!
--- SEARCHING FOR DATASET ---
‚ùå ERROR: Could not find image folders (A-Z).


In [2]:
import os
import cv2
import csv
import zipfile
import mediapipe as mp

# --- 1. SETUP LIBRARIES ---
try:
    mp_hands = mp.solutions.hands
except AttributeError:
    import mediapipe.python.solutions.hands as mp_hands

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)
print("‚úÖ SUCCESS: MediaPipe is ready.")

# --- 2. FIND DATA ---
zip_path = "/content/dataset.zip"
if not os.path.exists(zip_path):
    if os.path.exists("/dataset.zip"): zip_path = "/dataset.zip"
    else:
        print("‚ùå ERROR: 'dataset.zip' missing. Re-upload it if needed.")
        exit()

# --- 3. UNZIP ---
extract_path = "/content/temp_data/"
if not os.path.exists(extract_path):
    print("Unzipping data...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

# --- 4. FIND IMAGES (UPDATED FOR LOWERCASE) ---
TARGET_DATA_DIR = None
print("--- SEARCHING FOR FOLDERS ---")

for root, dirs, files in os.walk(extract_path):
    # Check if folder has many subfolders (like a,b,c or 0,1,2)
    if len(dirs) > 5:
        # CHECK FOR LOWERCASE 'a', CAPS 'A', OR NUMBER '0'
        if 'a' in dirs or 'A' in dirs or '0' in dirs:
            TARGET_DATA_DIR = root
            print(f"‚úÖ Found data in: {TARGET_DATA_DIR}")
            break

if not TARGET_DATA_DIR:
    print("‚ùå ERROR: Still couldn't find folders. Here is what I found:")
    for root, dirs, _ in os.walk(extract_path):
        print(f"Folder: {root} -> Subfolders: {dirs}")
else:
    # --- 5. EXTRACT & SAVE ---
    OUTPUT_FILE = "/content/data.csv"
    f = open(OUTPUT_FILE, 'w', newline='')
    writer = csv.writer(f)

    # Header
    header = ['label'] + [f'x{i}' for i in range(21)] + [f'y{i}' for i in range(21)]
    writer.writerow(header)

    print("--- PROCESSING (Takes ~60 seconds) ---")
    count = 0
    # Process folders sorted alphabetically
    for dir_name in sorted(os.listdir(TARGET_DATA_DIR)):
        dir_path = os.path.join(TARGET_DATA_DIR, dir_name)
        if os.path.isdir(dir_path):
            # print(f"Scanning {dir_name}...") # Uncomment for debug
            for img_name in os.listdir(dir_path):
                img_path = os.path.join(dir_path, img_name)
                # Check for valid images
                if img_path.lower().endswith(('.jpg', '.jpeg', '.png')):
                    img = cv2.imread(img_path)
                    if img is None: continue

                    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    results = hands.process(img_rgb)

                    if results.multi_hand_landmarks:
                        lm = results.multi_hand_landmarks[0]
                        base_x, base_y = lm.landmark[0].x, lm.landmark[0].y

                        # Save Data: Label, x0,x1... y0,y1...
                        row = [dir_name] + [p.x - base_x for p in lm.landmark] + [p.y - base_y for p in lm.landmark]
                        writer.writerow(row)
                        count += 1

    f.close()
    print(f"\nüéâ DONE! Extracted {count} hand gestures.")
    print("üëâ Right-click 'data.csv' in the Files tab and Download!")

‚úÖ SUCCESS: MediaPipe is ready.
--- SEARCHING FOR FOLDERS ---
‚úÖ Found data in: /content/temp_data/dataset - Gesture Speech
--- PROCESSING (Takes ~60 seconds) ---





üéâ DONE! Extracted 26928 hand gestures.
üëâ Right-click 'data.csv' in the Files tab and Download!
