# Hand Gesture Classification
### by Adrian Abraham

## 1 - Package installation
- **tensorflow**: for model creation and predictions
- **pandas**: for csv_reading
- **numpy**: for data storage and manipulation
- **mediapipe**: contains pre-made hand detection module for data collection
- **open-cv**: for live feed
- **ast**: to convert string literal lists into actual lists

In [None]:
!pip install tensorflow

In [None]:
!pip install pandas

In [None]:
!pip install mediapipe

In [None]:
!pip install opencv-python

## 2 - Importing packages

In [14]:
import tensorflow as tf
import numpy as np
import pandas as pd
import ast
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.activations import linear, relu, sigmoid
from tensorflow.keras.losses import BinaryCrossentropy

### Quick check that tensorflow is working

In [15]:
print(tf.__version__)

2.16.2


## 3 - Importing data

In [42]:
# Positive and negative data are stored in csv format.
# We can use read_csv() to get the contents of each file
df1 = pd.read_csv('peace.csv')
df0 = pd.read_csv('not_peace.csv')

In [64]:
df1.info

<bound method DataFrame.info of                                                  coords
0     [(0.6682692307692307, 1.0), (0.384615384615384...
1     [(0.6634615384615384, 1.0), (0.379807692307692...
2     [(0.6650717703349283, 1.0), (0.382775119617224...
3     [(0.6523809523809524, 1.0), (0.376190476190476...
4     [(0.6555023923444976, 1.0), (0.377990430622009...
...                                                 ...
1533  [(0.7243243243243244, 1.0), (0.491891891891891...
1534  [(0.7297297297297297, 1.0), (0.497297297297297...
1535  [(0.7297297297297297, 1.0), (0.491891891891891...
1536  [(0.7336956521739131, 1.0), (0.5, 0.9354838709...
1537  [(0.7336956521739131, 1.0), (0.5, 0.9354838709...

[1538 rows x 1 columns]>

In [65]:
df0.info

<bound method DataFrame.info of                                                  coords
0     [(0.75, 1.0), (0.5340909090909091, 0.946547884...
1     [(0.7154696132596685, 1.0), (0.511049723756906...
2     [(0.6941489361702128, 1.0), (0.486702127659574...
3     [(0.6851851851851852, 1.0), (0.473544973544973...
4     [(0.6487179487179487, 1.0), (0.446153846153846...
...                                                 ...
2007  [(0.7950530035335689, 1.0), (0.505300353356890...
2008  [(0.7758620689655172, 1.0), (0.496551724137931...
2009  [(0.75, 1.0), (0.48333333333333334, 0.93627450...
2010  [(0.7435897435897436, 1.0), (0.493589743589743...
2011  [(0.7272727272727273, 1.0), (0.482758620689655...

[2012 rows x 1 columns]>

In [44]:
# The format of our csv has a list of data as a string, 
# to turn them into lists so we use the ast library
df1['coords'] = df1['coords'].apply(ast.literal_eval)
positive_data = np.array(df1['coords'].tolist())

df0['coords'] = df0['coords'].apply(ast.literal_eval)
negative_data = np.array(df0['coords'].tolist())

In [19]:
positive_data

array([[[0.79047619, 1.        ],
        [0.53333333, 0.93246753],
        [0.38095238, 0.8       ],
        ...,
        [0.99047619, 0.61298701],
        [0.92380952, 0.69350649],
        [0.88571429, 0.76103896]],

       [[0.8       , 1.        ],
        [0.53333333, 0.93023256],
        [0.38095238, 0.79586563],
        ...,
        [0.99047619, 0.60206718],
        [0.92380952, 0.67700258],
        [0.88571429, 0.74677003]],

       [[0.8056872 , 1.        ],
        [0.54028436, 0.93506494],
        [0.38388626, 0.8       ],
        ...,
        [0.99052133, 0.61038961],
        [0.92417062, 0.69090909],
        [0.87677725, 0.76103896]],

       ...,

       [[0.88481675, 1.        ],
        [0.64921466, 0.95481928],
        [0.5026178 , 0.8313253 ],
        ...,
        [0.97382199, 0.63554217],
        [0.91099476, 0.71686747],
        [0.87958115, 0.78012048]],

       [[0.875     , 1.        ],
        [0.64583333, 0.95495495],
        [0.49479167, 0.84084084],
        .

In [20]:
negative_data

array([[[0.7804878 , 1.        ],
        [0.47154472, 0.90310078],
        [0.27642276, 0.63565891],
        ...,
        [0.93495935, 0.37984496],
        [0.87804878, 0.54263566],
        [0.90243902, 0.59689922]],

       [[0.77642276, 1.        ],
        [0.46747967, 0.88416988],
        [0.2804878 , 0.62162162],
        ...,
        [0.93495935, 0.37065637],
        [0.87804878, 0.53281853],
        [0.90243902, 0.58301158]],

       [[0.78225806, 1.        ],
        [0.47580645, 0.88931298],
        [0.29032258, 0.6221374 ],
        ...,
        [0.93951613, 0.36641221],
        [0.87903226, 0.52290076],
        [0.90322581, 0.57251908]],

       ...,

       [[0.52419355, 1.        ],
        [0.32795699, 0.91048593],
        [0.21505376, 0.74680307],
        ...,
        [0.8844086 , 0.52941176],
        [0.94623656, 0.44757033],
        [1.        , 0.36061381]],

       [[0.52291105, 1.        ],
        [0.32614555, 0.90816327],
        [0.21293801, 0.74744898],
        .

In [67]:
# We can concatenate them together to have all our data in one np array
data = np.concatenate((positive_data,negative_data))
data.shape

(3550, 21, 2)

In [68]:
# Each training example can be either 1 or 0
targets = np.zeros(data.shape[0])

# We are setting each training example to have either 0 or 1 output
for i in range(positive_data.shape[0]):
    targets[i] = 1

In [47]:
model = Sequential([
    Input(shape=(21,2)),          # Define the input shape here
    Flatten(),                     # Flatten the (21, 2) input into a 1D vector
    Dense(25, activation='relu'), # Fully connected layer with 128 neurons
    Dense(10, activation='relu'),  # Fully connected layer with 64 neurons
    Dense(1, activation='sigmoid') # Output layer for binary classification
])
model.summary()

In [48]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=BinaryCrossentropy(),  # Use appropriate loss function for classification
              metrics=['accuracy'])

In [49]:
history = model.fit(
    data,targets,
    epochs=40
)

Epoch 1/40
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 511us/step - accuracy: 0.8216 - loss: 0.5647
Epoch 2/40
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 456us/step - accuracy: 0.9755 - loss: 0.2331
Epoch 3/40
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 456us/step - accuracy: 0.9966 - loss: 0.0776
Epoch 4/40
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 473us/step - accuracy: 0.9994 - loss: 0.0376
Epoch 5/40
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 472us/step - accuracy: 0.9998 - loss: 0.0189
Epoch 6/40
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 472us/step - accuracy: 0.9991 - loss: 0.0103
Epoch 7/40
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 476us/step - accuracy: 1.0000 - loss: 0.0064
Epoch 8/40
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 545us/step - accuracy: 1.0000 - loss: 0.0038
Epoch 9/40
[1m111/111[

In [50]:
testdf = pd.read_csv('testing.csv')
testdf['coords'] = testdf['coords'].apply(ast.literal_eval)
testing = np.array(testdf['coords'].tolist())

In [28]:
testing[0].shape

(21, 2)

In [29]:
model.predict(testing)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


array([[9.97410417e-01],
       [9.86738563e-01],
       [9.73150551e-01],
       [9.80294406e-01],
       [9.87193823e-01],
       [9.94244576e-01],
       [9.96303141e-01],
       [9.84041810e-01],
       [9.90395844e-01],
       [9.49390769e-01],
       [9.77789700e-01],
       [9.63346422e-01],
       [9.57746744e-01],
       [9.72320795e-01],
       [9.91492629e-01],
       [9.89777267e-01],
       [9.94082332e-01],
       [9.85974967e-01],
       [9.93860483e-01],
       [9.97371852e-01],
       [9.98103440e-01],
       [9.98991072e-01],
       [9.99432445e-01],
       [9.97965276e-01],
       [9.98733580e-01],
       [9.98876154e-01],
       [9.96501088e-01],
       [9.96839583e-01],
       [9.98586833e-01],
       [9.94956553e-01],
       [9.89763737e-01],
       [9.98126984e-01],
       [9.97813821e-01],
       [9.98788595e-01],
       [9.99682665e-01],
       [9.99602139e-01],
       [9.99581218e-01],
       [9.99775827e-01],
       [9.99550879e-01],
       [9.99821603e-01],


In [19]:
model.predict(testing[28:])

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 563us/step


array([[9.9801421e-01],
       [9.9299532e-01],
       [9.8520136e-01],
       [9.9679834e-01],
       [9.9651372e-01],
       [9.9793768e-01],
       [9.9941188e-01],
       [9.9932051e-01],
       [9.9930274e-01],
       [9.9959558e-01],
       [9.9915725e-01],
       [9.9957556e-01],
       [9.9966812e-01],
       [9.9945009e-01],
       [9.9949199e-01],
       [9.9944693e-01],
       [9.9949908e-01],
       [9.9943513e-01],
       [9.9947214e-01],
       [9.9889725e-01],
       [9.9897528e-01],
       [9.9858087e-01],
       [9.9886477e-01],
       [9.9695742e-01],
       [9.9915546e-01],
       [9.9886280e-01],
       [9.9872315e-01],
       [9.9754685e-01],
       [9.9756306e-01],
       [9.9836874e-01],
       [9.9696416e-01],
       [9.9782532e-01],
       [9.4193500e-01],
       [4.4757444e-01],
       [9.8434966e-03],
       [1.0461462e-02],
       [8.9770174e-03],
       [1.5751423e-02],
       [1.1404051e-02],
       [1.0364762e-02],
       [1.5403262e-02],
       [1.729276

In [30]:
import mediapipe as mp
import cv2

In [52]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False,
                       max_num_hands=1,
                       min_detection_confidence=0.5,
                       min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

I0000 00:00:1719937188.162490 13556742 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1
W0000 00:00:1719937188.188810 13609952 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1719937188.198590 13609954 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [69]:
cap = cv2.VideoCapture(0)
BOX_MARGIN = 50
while cap.isOpened():
    # capturing the current frame
    ret, frame = cap.read()
    # getting window dimensions, shape contains height, width, and channels
    height, width, _ = frame.shape

    # if no frame returned, break
    if not ret:
        break
        
    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        # for each hand found we calculate the coords of the landmarks
        for hand_landmarks in results.multi_hand_landmarks:
            hand_coords = np.array([ (int(landmark.x * width), int(landmark.y * height)) for landmark in hand_landmarks.landmark ])
        mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        
        hand_x_min, hand_x_max = min(x for x,y in hand_coords), max(x for x,y in hand_coords)
        hand_y_min, hand_y_max = min(y for x,y in hand_coords), max(y for x,y in hand_coords)
        
        rect_start, rect_end = (hand_x_min - BOX_MARGIN, hand_y_min - BOX_MARGIN), (hand_x_max + BOX_MARGIN, hand_y_max + BOX_MARGIN)
        rel_coords = np.array([ (x - rect_start[0], y - rect_start[1]) for x,y in hand_coords ])
        
        rel_x_max = max(x for x,y in rel_coords)
        rel_y_max = max(y for x,y in rel_coords)

        normalized_rel_coords = np.array([ (x / rel_x_max, y / rel_y_max) for x,y in rel_coords ])
        normalized_rel_coords = np.expand_dims(normalized_rel_coords, axis=0)

        prediction = model.predict(normalized_rel_coords)[0][0]
        if prediction > 0.9:
            text = f"peace be with you, Confidence: {round(prediction * 100,2)}%"
        else:
            text = f"give me some peace bro, Confidence: {round(prediction * 100,2)}%"

        cv2.putText(frame, text, (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        
    cv2.imshow('Hand Landmarks', frame)

    # when q pressed, we end video capture
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12

In [54]:
type(prediction[0])

numpy.float32