**Augementation UDFs**

In [1]:
import cv2
import numpy as np

def random_rotation(image):
    angle = np.random.uniform(-30, 30) #rotate between -30 and 30 degrees
    h, w = image.shape[:2]
    M = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
    rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_LINEAR)
    return rotated

def random_flip(image):
    flip_code = 1  #horizontal flip
    flipped = cv2.flip(image, flip_code)
    return flipped

def random_shift(image):
    max_shift = 25  # shift up to 25 pixels
    rows, cols = image.shape
    M = np.float32([[1, 0, np.random.randint(-max_shift, max_shift)],
                    [0, 1, np.random.randint(-max_shift, max_shift)]])
    shifted = cv2.warpAffine(image, M, (cols, rows))
    return shifted

def random_zoom(image):
    zoom_factor = np.random.uniform(1.0, 1.5)
    height, width = image.shape[:2]
    new_height, new_width = int(height * zoom_factor), int(width * zoom_factor)
    
    zoomed_image = cv2.resize(image, (new_width, new_height))
    crop_height = (new_height - height) // 2
    crop_width = (new_width - width) // 2
    zoomed_image = zoomed_image[crop_height:crop_height+height, crop_width:crop_width+width]
    return zoomed_image

def random_brightness(image):
    brightness_factor = np.random.uniform(0.5, 1.5)
    br_image = cv2.convertScaleAbs(image, alpha=brightness_factor, beta=0)
    return br_image

def random_blur(image):
    ksize = 3
    blurred_image = cv2.GaussianBlur(image, (ksize, ksize), 0)
    return blurred_image

def augment_image(image):
    if np.random.rand() < 0.5:
        image = random_rotation(image)
    if np.random.rand() < 0.5:
        image = random_flip(image)
    if np.random.rand() < 0.5:
        image = random_shift(image)
    if np.random.rand() < 0.5:
        image = random_zoom(image)
    if np.random.rand() < 0.5:
        image = random_brightness(image)
    if np.random.rand() < 0.5:
        image = random_blur(image)
    return image

**Loading images and labels**

In [2]:
import os

def load_data(data_dir, augment=False):
    images, labels = [], []
    for label in os.listdir(data_dir):
        for image_file in os.listdir(os.path.join(data_dir, label)):
            image = cv2.imread(os.path.join(data_dir, label, image_file), cv2.IMREAD_GRAYSCALE)
            image = cv2.resize(image, (256, 256))  # will use fixed size for images
            images.append(image)
            labels.append(label)
            if augment:
                for i in range(5):  # 5 augnmented per image?
                    aug_image = augment_image(image)
                    images.append(aug_image)
                    labels.append(label)
    return np.array(images), np.array(labels)

images, labels = load_data('ds', augment=True)

**Extracting hog features**

In [3]:
from skimage.feature import hog

def extract_hog_features(images):
    hog_features = []
    for image in images:
        features = hog(image, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(4, 4), block_norm='L2-Hys')
        hog_features.append(features)
    return np.array(hog_features)

hog_features = extract_hog_features(images)


**DS split**

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(hog_features, labels, test_size=0.2)


**Model teaching**

In [5]:
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Encode labels
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# Train SVM model
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train_encoded)


[1 1 0 ... 1 0 1]
[1 0 1 2 2 0 1 1 1 1 2 1 0 1 1 1 0 1 1 1 0 2 1 0 0 1 1 1 0 0 2 0 0 1 0 0 1
 2 1 1 1 1 1 1 1 1 0 0 1 0 1 1 1 1 1 0 1 0 0 1 1 0 0 1 1 2 0 1 0 1 0 1 0 2
 1 1 2 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 1 0 0 0 1 1 0 0 0 1 1 2 1 1 1 0 1 2
 1 1 1 1 1 0 1 1 1 1 2 1 0 2 0 1 0 1 1 1 1 1 1 1 0 1 0 1 2 1 1 0 2 1 2 0 1
 1 0 1 1 1 0 0 0 0 1 1 0 1 0 0 0 2 2 1 2 1 1 1 1 1 1 0 0 2 1 1 1 0 1 0 1 1
 1 0 2 1 1 1 0 1 2 0 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 2 0 0 0 1 1 0 1 0 0 1 1
 1 2 0 0 2 1 1 1 1 1 1 1 1 2 2 0 1 1 0 2 0 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 0
 1 0 0 0 0 0 1 1 0 1 1 2 1 1 2 1 1 0 1 1 1 1 1 2 1 0 1 1 0 2 2 0 0 0 0 0 0
 1 2 1 2 2 0 1 0 2 0 0 0 2 1 2 1 1 2 2 2 1 0 0 0 1 0 0 0 1 2 1 1 1 1 2 0 1
 1 2 2 0 0 2 1 1 1 2 0 1 1 1 0 0 1 0 2 1 1 2 1 0 1 0 2 2 1 1 1 1 1 2 0 1 1
 1 1 0 0 1 1 1 0 0 0 1 1 2 1 2 0 1 1 1 0 1 0 1 1 1 1 1 1 1 1 2 0 1 1 1 1 0
 0 2 2 1 1 1 1 2 0 1 0 2 0 0 1 1 1 1 0 1 1 2 0 0 1 0 1 1 1 1 0 1 0 2 1 2 2
 2 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 0 1 2 1 1 0 1 1 1 0 1 0
 0 1 0 

**Predict and evaluate**

In [6]:
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test_encoded, y_pred)
print(f"Accuracy: {accuracy}")
print(classification_report(y_test_encoded, y_pred, target_names=le.classes_))


Accuracy: 0.933852140077821
              precision    recall  f1-score   support

        Fist       0.91      0.92      0.92       245
        Palm       0.95      0.96      0.95       426
       Thumb       0.93      0.87      0.90       100

    accuracy                           0.93       771
   macro avg       0.93      0.92      0.92       771
weighted avg       0.93      0.93      0.93       771



**Magic**

In [18]:
import pygame
import time 

pygame.mixer.init()

playlist = ['song1.mp3', 'song2.mp3']
current_song = 0

def play_music():
    global current_song
    pygame.mixer.music.load(playlist[current_song])
    pygame.mixer.music.play()

def stop_music():
    pygame.mixer.music.stop()
    
def skip_music():
    global current_song
    current_song = (current_song + 1) % len(playlist)
    play_music()

def recognize_gesture(frame, model, le):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray_frame = cv2.resize(gray_frame, (256, 256))
    features = hog(gray_frame, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(4, 4), block_norm='L2-Hys').reshape(1, -1)
    prediction = model.predict(features)
    label = le.inverse_transform(prediction)
    return label[0]

cnt_fist = 0
cnt_palm = 0
cnt_thumb = 0

def clear_cnt():
    cnt_fist = 0
    cnt_palm = 0
    cnt_thumb = 0
    
cap = cv2.VideoCapture(0)

play_music()

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    label = recognize_gesture(frame, svm_model, le)
    if label == 'Palm':
        cnt_palm += 1
        if cnt_palm > 10 :
            stop_music()
            clear_cnt()
            
    elif label == 'Thumb':
        cnt_thumb += 1
        if cnt_thumb > 10 :
            skip_music()
            clear_cnt()
            
    elif label == 'Fist':
        cnt_fist += 1
        if cnt_fist > 10 :
            play_music()
            clear_cnt()
    
    cv2.putText(frame, label, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('Gesture Recognition', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
pygame.mixer.quit()


KeyboardInterrupt: 