In [24]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import mediapipe as mp

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #color conversion
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

#Open cam and release
cap = cv2.VideoCapture(0)

# Setting Mediapipe
mp_holistic = mp.solutions.holistic  # holistic model
mp_drawing = mp.solutions.drawing_utils  # drawing utils

# Access Mediapipe Model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()

        #Make decttions
        image, results = mediapipe_detection(frame, holistic)
        #print(results)
        
        #Draw Landmarks
        draw_styled_landmarks(image, results)

        #Show to screen
        cv2.imshow('OpenCV Feed', image)
        if cv2.waitKey(10) &0xFF == ord('q'):
            break
cap.release()
cv2.destroyAllWindows()

In [14]:
def draw_styled_landmarks(image, results):
    #Draw the face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1) 
                             )
    #Draw the pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=4) 
                             )
    #Draw the left_hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2) 
                             )
    #Draw the right_hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2) 
                             )


In [15]:
#draw_landmarks(frame,results)
#plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

NameError: name 'draw_landmarks' is not defined

In [None]:
#Save the landmarks as Numpy
def extract_keypoints(results):
    pose = np.array(list([res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark)).flatten() if results.pose_landmarks else np.zeros(132)
    lh = np.array(list([res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark)).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array(list([res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark)).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    face = np.array(list([res.x, res.y, res.z] for res in results.face_landmarks.landmark)).flatten() if results.face_landmarks else np.zeros(1404)
    return np.concatenate([pose, face, lh, rh])
extract_keypoints(results).shape

In [None]:
#Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Date')

#Actions that we try to detect
actions = np.array(['hello', 'thank', 'iloveu'])

#Thirty video worth of data
no_sequences = 30

#Video are going to be 30 frames in length
sequence_length = 30

In [None]:
#Create folder squence
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [None]:
#Collect action data
cap = cv2.VideoCapture(0)

# Access Mediapipe Model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    for action in actions:
        #Loop through sequences aka videos
        for sequence in range(no_sequences):
            #Loop through video length aka sequence length
            for frame_num in range(sequence_length):
        
        
                ret, frame = cap.read()

                #Make decttions
                image, results = mediapipe_detection(frame, holistic)
                #print(results)

                #Draw Landmarks
                draw_styled_landmarks(image, results)
                
                #NEW Apply wait logic
                if frame_num == 0:
                    cv2.putText(image, 'START COLLECTION', (120,200),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE.AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 1, cv2.LINE.AA)
                    cv2.waitKey(2000)
                else:
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 1, cv2.LINE.AA)
                
                #NEW Export keypoints
                keypoints = extract_keypoint(result)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_PATH, keypoints)
                
                #Show to screen
                cv2.imshow('OpenCV Feed', image)
                if cv2.waitKey(10) &0xFF == ord('q'):
                    break
cap.release()
cv2.destroyAllWindows()

In [22]:
#Save the landmark dataset each frame in a single folder
result_test = extract_keypoints(results)
np.save('0', result_test)
np.load('0.npy')

array([0., 0., 0., ..., 0., 0., 0.])

In [23]:
cap.release()
cv2.destroyAllWindows()

In [30]:
face = np.array(list([res.x, res.y, res.z] for res in results.face_landmarks.landmark)).flatten()

In [32]:
face

array([ 0.46034628,  1.0644002 , -0.00614396, ...,  0.60176933,
        0.88338017, -0.01222336])

In [33]:
face.shape

(1404,)

In [None]:
from transformers import BertTokenizer, BertForMaskedLM
import torch

# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize input and label
text = "[CLS] 今天/吃饭/我/妈妈/家/去 [SEP]"
label_text = "今天我去妈妈家吃饭"

# Mask a token that we will try to predict back with `BertForMaskedLM`
masked_index = 5  
tokenized_text = tokenizer.tokenize(text)
tokenized_text[masked_index] = '[MASK]'
indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)

# Convert inputs to PyTorch tensors
tokens_tensor = torch.tensor([indexed_tokens])

# Load pre-trained model (weights)
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()

# Predict all tokens
with torch.no_grad():
    outputs = model(tokens_tensor)
    predictions = outputs[0]

# Confirm we were able to predict the masked word
predicted_index = torch.argmax(predictions[0, masked_index]).item()
predicted_token = tokenizer.convert_ids_to_tokens([predicted_index])[0]
print(f"Original Text: {text}")
print(f"Masked Text: {tokenized_text}")
print(f"Predicted Token: {predicted_token}")
