In [28]:
import cv2
import numpy as np
import os
import mediapipe as mp
import pandas as pd
from sklearn.model_selection import train_test_split

In [29]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [30]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
    return image, results

In [31]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks,mp_holistic.FACEMESH_TESSELATION)
    mp_drawing.draw_landmarks(image, results.pose_landmarks,mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS)

In [32]:
def draw_styled_landmarks(image,results):
    mp_drawing.draw_landmarks(image, results.face_landmarks,mp_holistic.FACEMESH_TESSELATION,
                              mp_drawing.DrawingSpec(color=(80,110,10), thickness=1,circle_radius=1),
                              mp_drawing.DrawingSpec(color=(80,256,121), thickness=1,circle_radius=1))
    mp_drawing.draw_landmarks(image, results.pose_landmarks,mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80,22,10), thickness=2,circle_radius=4),
                              mp_drawing.DrawingSpec(color=(80,44,121), thickness=2,circle_radius=2))
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121,22,76), thickness=2,circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121,44,250), thickness=2,circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2,circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2,circle_radius=2))

In [33]:
def extract_keypoints(results):
    pose1 = np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face1 = np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh1 = np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh1 = np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose1,face1,lh1,rh1])

In [34]:
DATA_PATH = os.path.join('MP_Data')
actions = np.array(['hello','thanks','iloveyou'])
no_sequences = 30
sequence_length = 30

In [35]:
label_map = {label:num for num, label in enumerate(actions)}

In [41]:
label_map

{'hello': 0, 'thanks': 1, 'iloveyou': 2}

In [42]:
sequences, labels = [], []
for action in actions:
    for s in range(no_sequences):
        window = []
        
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(s), "{}.npy".format(frame_num)))
            window.append(res)
        # w1=window
        sequences.append(window)
        labels.append(label_map[action])

In [43]:
# from tensorflow import 
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import TensorBoard


In [44]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [45]:
model.load_weights('action.h5')

In [46]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [47]:
sequence = []
sentence = []
threshold = 0.8
import warnings

In [48]:
#final execution block
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)

        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
        
            
        
        #3. Viz logic
            if res[np.argmax(res)] >= threshold: 
                if len(sentence) > 0: 
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30),cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        



        # image,results = mediapipe_detection(frame,holistic)
        
        # draw_styled_landmarks(image,results)
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())

            # Extract Face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())

            # Concate rows
            row = pose_row+face_row
            X = pd.DataFrame([row])
            
            # X = scaler.transform(X)#nenu uncomment chesa
            warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
            body_language_class = model1.predict(X)[0]
            body_language_prob = model1.predict_proba(X)[0]
            # print(body_language_class, body_language_prob)#nenu uncomment chesa
            
            
            # Grab ear coords
            coords = tuple(np.multiply(
                            np.array(
                                (results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
                                 results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y))
                        , [640,480]).astype(int))
            

# Suppress the specific warning
            

# Your other code here...

            cv2.rectangle(image,(coords[0], coords[1]+5),(coords[0]+len(body_language_class)*20, coords[1]-30),(153, 183, 96), -1)
            cv2.putText(image, body_language_class, coords,cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            
        except:
            pass

        # image = cv2.resize(image, (640,480))
        # image = cv2.resize(image, (640,480))
        # alpha = 0.5
        # combined_img = cv2.addWeighted(image, alpha, image, 1-alpha, 0)
        



        # Show to screen
        cv2.imshow('Final output', image)

        # Break gracefully
        if cv2.waitKey(2) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()


iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
hello
hello
hello
iloveyou
hello
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
hello
hello
iloveyou
iloveyou
iloveyou
iloveyou
thanks
iloveyou
iloveyou
iloveyou
iloveyou
hello
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
hello
hello
hello
hello
hello
hello
hello
hello
hello
iloveyou
iloveyou
iloveyou
hello
hello
iloveyou
iloveyou
iloveyou
iloveyou
iloveyou
hello
hello
iloveyou
iloveyou
iloveyou
iloveyou
hello
hello
hello
iloveyou
iloveyou
iloveyou
iloveyou
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks
thanks

In [17]:
cap.release()
cv2.destroyAllWindows()

In [18]:
df = pd.read_csv('coords.csv')

In [19]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)

In [21]:
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [22]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression()),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [23]:
import joblib

for algo, model in pipelines.items():
    modelfile = f"{algo}_model.joblib"
    joblib.dump(model, modelfile)

In [24]:
import os
import joblib

fit_models = {}

for algo in pipelines:
    modelfile = f"{algo}_model.joblib"
    if os.path.exists(modelfile):
        fit_models[algo] = joblib.load(modelfile)
    else:
        # Model not found, create and fit a new one
        model = pipelines[algo].fit(X_train, y_train)
        fit_models[algo] = model


In [25]:
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle

In [26]:
model_filename = 'body_language.pkl'
checkpoint_filename = 'fit_models_checkpoint.flag'

# Check if models are already fitted
if os.path.exists(checkpoint_filename):
    with open(model_filename, 'rb') as f:
        model1 = pickle.load(f)
    print("Models loaded from checkpoint.")
else:
    # Fitting models
    fit_models = {}
    for algo, pipeline in pipelines.items():
        model = pipeline.fit(X_train, y_train)
        fit_models[algo] = model

    # Save the trained Random Forest model
    with open(model_filename, 'wb') as f:
        pickle.dump(fit_models['rf'], f)

    # Create a checkpoint flag
    with open(checkpoint_filename, 'w') as checkpoint:
        checkpoint.write("Models fitted and saved.")


Models loaded from checkpoint.


In [27]:
cap = cv2.VideoCapture(0)
# Initiate holistic model1
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        ret, frame = cap.read()

        # Recolor Feed
        image,results = mediapipe_detection(frame,holistic)
        # 1. Draw face landmarks
        draw_styled_landmarks(image,results)


        # Export coordinates


        
        try:
            # Extract Pose landmarks
            pose = results.pose_landmarks.landmark
            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
            
            # Extract Face landmarks
            face = results.face_landmarks.landmark
            face_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in face]).flatten())
            
            # Concate rows
            row = pose_row+face_row
            
#             

            # Make Detections
            X = pd.DataFrame([row])
            # X = scaler.transform(X)
            warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
            body_language_class = model1.predict(X)[0]
            body_language_prob = model1.predict_proba(X)[0]
            print(body_language_class, body_language_prob)
            
            # Grab ear coords
            coords = tuple(np.multiply(
                            np.array(
                                (results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].x, 
                                 results.pose_landmarks.landmark[mp_holistic.PoseLandmark.LEFT_EAR].y))
                        , [640,480]).astype(int))
            
            cv2.rectangle(image, 
                          (coords[0], coords[1]+5), 
                          (coords[0]+len(body_language_class)*20, coords[1]-30), 
                          (245, 117, 16), -1)
            cv2.putText(image, body_language_class, coords,cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            
        except:
            pass
                        
        cv2.imshow('Raw Webcam Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

Sad [0.08 0.43 0.18 0.1  0.06 0.15]
Sad [0.09 0.49 0.17 0.06 0.07 0.12]
Sad [0.09 0.45 0.19 0.07 0.08 0.12]
Sad [0.09 0.45 0.21 0.07 0.08 0.1 ]
Sad [0.09 0.43 0.22 0.07 0.09 0.1 ]
Sad [0.1  0.42 0.2  0.08 0.09 0.11]
Serious [0.1  0.29 0.32 0.07 0.09 0.13]
Serious [0.08 0.27 0.36 0.07 0.09 0.13]
Sad [0.1  0.4  0.28 0.06 0.07 0.09]
Sad [0.09 0.43 0.25 0.06 0.09 0.08]
Sad [0.08 0.47 0.21 0.06 0.1  0.08]
Sad [0.08 0.53 0.17 0.05 0.09 0.08]
Sad [0.09 0.52 0.18 0.05 0.08 0.08]
Sad [0.08 0.54 0.17 0.05 0.08 0.08]
