<a href="https://colab.research.google.com/github/chenoa23/CV-Projects/blob/main/Gesture_Controlled_Light_Switch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Ignacio Calvo\
Chenoa Nussberger\
Paula Andrea Gallego\
Sheena Johns


# **Gesture-Controlled Light Switch Using Computer Vision**


#Project Purpose / Point
This project implements a gesture-controlled light switch using computer vision and deep learning. It uses a labeled dataset of hand gesture images to train a convolutional neural network (CNN) based on MobileNetV2 architecture. The trained model can recognize multiple hand gestures in real-time video input. By detecting specific hand gestures, the system toggles the state of a virtual light switch (on/off). The project integrates MediaPipe for hand landmark detection and TensorFlow for gesture classification, demonstrating practical application of computer vision for intuitive, contactless control interfaces.



# Dataset Inspection and Structure Overview

In [1]:
import os

# Define dataset paths
train_dir = '/content/drive/MyDrive/CAI 2840C/archive 1/train/train'
test_dir = '/content/drive/MyDrive/CAI 2840C/archive 1/test/test'

def count_images_in_subfolders(base_dir):
    for class_folder in os.listdir(base_dir):
        class_folder_path = os.path.join(base_dir, class_folder)
        if os.path.isdir(class_folder_path):  # Check if it's a directory
            num_images = len([
                file for file in os.listdir(class_folder_path)
                if os.path.isfile(os.path.join(class_folder_path, file))
            ])
            print(f"Class '{class_folder}' has {num_images} images")
        else:
            print(f"Unexpected file in directory: {class_folder}")

# Check contents of the train directory
print("Train Directory Structure:")
count_images_in_subfolders(train_dir)

# Check contents of the test directory
print("\nTest Directory Structure:")
count_images_in_subfolders(test_dir)

Train Directory Structure:


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/CAI 2840C/archive 1/train/train'

# Model Preparation, Training, and Saving

In [None]:
!pip install mediapipe

In [None]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D
from google.colab.patches import cv2_imshow

# Set up directories for training and testing data
train_dir = '/content/drive/MyDrive/CAI 2840C/archive 1/train/train'
test_dir = '/content/drive/MyDrive/CAI 2840C/archive 1/test/test'

# Data Augmentation for the training set
train_datagen = ImageDataGenerator(
    rescale=1.0/255,            # Normalize pixel values to [0, 1]
    rotation_range=20,          # Randomly rotate images
    width_shift_range=0.2,      # Randomly shift images horizontally
    height_shift_range=0.2,     # Randomly shift images vertically
    shear_range=0.2,            # Shear transformations
    zoom_range=0.2,             # Random zoom
    horizontal_flip=True,       # Flip images horizontally
    fill_mode='nearest'         # Fill missing pixels after transformations
)

# No augmentation for validation/test
test_datagen = ImageDataGenerator(rescale=1.0/255)

# Create the ImageDataGenerators for training and testing
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),     # Resize images to 128x128
    batch_size=16,              # Use a smaller batch size
    class_mode='categorical',   # Multi-class classification
    shuffle=True                # Shuffle data for training
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),     # Resize images to 128x128
    batch_size=8,               # Smaller batch for testing
    class_mode='categorical'    # Multi-class classification
)

# Use Transfer Learning with MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

# Add custom layers on top
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(train_generator.num_classes, activation='softmax')(x)

# Combine the base model and custom layers
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Set up early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Set up checkpoint to save the best model
checkpoint = ModelCheckpoint('gesture_model.keras', monitor='val_loss', save_best_only=True)

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=10,  # Start with fewer epochs
    validation_data=test_generator,
    validation_steps=test_generator.samples // test_generator.batch_size,
    callbacks=[checkpoint, early_stopping]
)

# Save the final model
model.save('gesture_model.keras')

#  Gesture Recognition and Light Control with MediaPipe and TensorFlow

In [None]:
# Load the trained model
model = load_model('gesture_model.keras')

# Class labels
class_labels = ['Gesture 0', 'Gesture 1', 'Gesture 2', 'Gesture 3', 'Gesture 4',
                'Gesture 5', 'Gesture 6', 'Gesture 7', 'Gesture 8', 'Gesture 9',
                'Gesture 10', 'Gesture 11', 'Gesture 12', 'Gesture 13', 'Gesture 14',
                'Gesture 15', 'Gesture 16', 'Gesture 17', 'Gesture 18', 'Gesture 19']

# MediaPipe setup
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

# Load video
video_path = '/content/drive/MyDrive/Movie on 12-3-24 at 12.00 PM.mov'
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print("Error: Unable to open video file.")
else:
    print("Video loaded successfully.")

# Light state
light_on = False

# Main loop for video processing
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        print("End of video reached.")
        break

    # Flip and process the frame
    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    roi = None
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Get bounding box around the detected hand
            h, w, _ = frame.shape
            bbox_x_min = int(min([lm.x for lm in hand_landmarks.landmark]) * w)
            bbox_y_min = int(min([lm.y for lm in hand_landmarks.landmark]) * h)
            bbox_x_max = int(max([lm.x for lm in hand_landmarks.landmark]) * w)
            bbox_y_max = int(max([lm.y for lm in hand_landmarks.landmark]) * h)

            # Define ROI dynamically based on bounding box
            roi = frame[bbox_y_min:bbox_y_max, bbox_x_min:bbox_x_max]
            cv2.rectangle(frame, (bbox_x_min, bbox_y_min), (bbox_x_max, bbox_y_max), (255, 0, 0), 2)

    # Predict the gesture if ROI is valid
    if roi is not None and roi.size > 0:
        roi_resized = cv2.resize(roi, (128, 128)) / 255.0  # Normalize
        roi_resized = np.expand_dims(roi_resized, axis=0)  # Add batch dimension

        # Predict the gesture
        predictions = model.predict(roi_resized)
        predicted_class = np.argmax(predictions)
        confidence = np.max(predictions)

        # Toggle light state if the confidence is high
        if confidence > 0.7:  # Threshold to avoid false triggers
            print(f"Predicted Gesture: {class_labels[predicted_class]} (Confidence: {confidence:.2f})")
            light_on = not light_on

    # Display light state
    status = "Light ON" if light_on else "Light OFF"
    color = (0, 255, 0) if light_on else (0, 0, 255)
    cv2.putText(frame, status, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

    # Display the frame in Colab
    cv2_imshow(frame)

# Release resources
cap.release()
print("Video processing completed.")