In [1]:
import imutils
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from tqdm import tqdm

In [2]:
mp_holistic = mp.solutions.holistic #Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                   # Image is no longer writeable
    results = model.process(image)                  # Make prediction
    image.flags.writeable = True                    # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # COLOR CONVERSION RGB 2 BGR
    return image, results

In [4]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [5]:
mp_holistic.POSE_CONNECTIONS

frozenset({(<PoseLandmark.NOSE: 0>, <PoseLandmark.LEFT_EYE_INNER: 1>),
           (<PoseLandmark.NOSE: 0>, <PoseLandmark.RIGHT_EYE_INNER: 4>),
           (<PoseLandmark.LEFT_EYE_INNER: 1>, <PoseLandmark.LEFT_EYE: 2>),
           (<PoseLandmark.LEFT_EYE: 2>, <PoseLandmark.LEFT_EYE_OUTER: 3>),
           (<PoseLandmark.LEFT_EYE_OUTER: 3>, <PoseLandmark.LEFT_EAR: 7>),
           (<PoseLandmark.RIGHT_EYE_INNER: 4>, <PoseLandmark.RIGHT_EYE: 5>),
           (<PoseLandmark.RIGHT_EYE: 5>, <PoseLandmark.RIGHT_EYE_OUTER: 6>),
           (<PoseLandmark.RIGHT_EYE_OUTER: 6>, <PoseLandmark.RIGHT_EAR: 8>),
           (<PoseLandmark.MOUTH_RIGHT: 10>, <PoseLandmark.MOUTH_LEFT: 9>),
           (<PoseLandmark.LEFT_SHOULDER: 11>, <PoseLandmark.LEFT_ELBOW: 13>),
           (<PoseLandmark.LEFT_SHOULDER: 11>, <PoseLandmark.LEFT_HIP: 23>),
           (<PoseLandmark.RIGHT_SHOULDER: 12>,
            <PoseLandmark.LEFT_SHOULDER: 11>),
           (<PoseLandmark.RIGHT_SHOULDER: 12>, <PoseLandmark.RIGHT_ELBOW: 14>)

In [6]:
def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1,circle_radius=1),
                             mp_drawing.DrawingSpec(color=(80,265,121), thickness=1,circle_radius=1)
                             ) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2,circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2,circle_radius=2)) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2,circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2,circle_radius=2)) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,6), thickness=2,circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2,circle_radius=2))

In [10]:
import cv2
#cap = cv2.VideoCapture(0)
# for i in range(0,3):
#cap = cv2.VideoCapture("./img/NIA_SL_WORD2751_REAL01_F.mp4")
cap = cv2.VideoCapture("./img/C1.mp4")
# for i in range(0,2,1):
#     cap = cv2.VideoCapture("./img/KETI_SL_000000000"+"{}.avi".format(i + 2))
    
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        
    
        # Read feed        
        ret, frame = cap.read()
        #frame = imutils.resize(frame, width=1200)
        if not ret:
            print("비디오 읽기 실패 / 비디오 모두 읽음")
            cap.release()   # 비디오 읽기 종료
            cv2.destroyAllWindows()  # 새로 연 창을 모두 닫아줌
            break
        # Make detections(탐지들)
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)
        
        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

In [11]:
#    cap.release()
#     cv2.destroyAllWindows()

In [12]:
#results.left_hand_landmarks.landmark
len(results.left_hand_landmarks.landmark)

21

In [13]:
pose = []
for res in results.pose_landmarks.landmark:
    test = np.array([res.x, res.y, res.z, res.visibility])
    pose.append(test)

In [14]:
#test
#pose
#len(pose)
#pose = np.array([[res.x, res.y, res.z, res.visibility(저항)] for res in results.pose_landmarks.landmark])
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)                                                                                                   # 학습시킬려고 flatten으로 쫙 펴줌
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [15]:
def extract_keypoints(results):
   pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
   face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)                                                                                                   # 학습시킬려고 flatten으로 쫙 펴줌
   lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
   rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3) 
   return np.concatenate([pose, face, lh, rh])

In [16]:
len(results.face_landmarks.landmark) * 3

1404

In [17]:
lh

array([ 5.83143115e-01,  6.29238844e-01, -2.58182445e-05,  5.66562176e-01,
        6.05806291e-01,  4.33658669e-03,  5.49372435e-01,  5.95453858e-01,
       -1.39358349e-03,  5.34061372e-01,  5.92155993e-01, -5.93127776e-03,
        5.23173928e-01,  5.93649924e-01, -1.08652450e-02,  5.54981470e-01,
        6.01748884e-01, -3.92497554e-02,  5.30368626e-01,  6.00174248e-01,
       -4.83734049e-02,  5.20630896e-01,  6.04530513e-01, -4.39037867e-02,
        5.15673518e-01,  6.07067227e-01, -3.79693247e-02,  5.57052076e-01,
        6.22166693e-01, -4.29518931e-02,  5.30041695e-01,  6.22513354e-01,
       -5.66135459e-02,  5.17416060e-01,  6.25611186e-01, -5.28139248e-02,
        5.10521531e-01,  6.28100812e-01, -4.73895818e-02,  5.59739590e-01,
        6.41355753e-01, -4.38750461e-02,  5.35988092e-01,  6.43261194e-01,
       -5.50028048e-02,  5.23026884e-01,  6.45456553e-01, -5.31619266e-02,
        5.14411986e-01,  6.47119224e-01, -5.11966534e-02,  5.62918186e-01,
        6.57795429e-01, -

In [18]:
#pose.shape
face

array([ 0.49836352,  0.26704258, -0.01112396, ...,  0.52483344,
        0.20699202,  0.00513541])

In [19]:
rh

array([ 4.34263647e-01,  6.76461041e-01, -2.67358009e-05,  4.41291422e-01,
        6.49384797e-01, -2.18114369e-02,  4.52806175e-01,  6.37020826e-01,
       -3.68067175e-02,  4.65202600e-01,  6.39760554e-01, -4.32884283e-02,
        4.73579854e-01,  6.52741015e-01, -4.87716161e-02,  4.41095740e-01,
        6.64543629e-01, -6.92948028e-02,  4.59253103e-01,  6.67843640e-01,
       -8.88441280e-02,  4.70203757e-01,  6.66785419e-01, -9.33928788e-02,
        4.76266205e-01,  6.68022275e-01, -9.37015414e-02,  4.42954004e-01,
        6.88581824e-01, -5.92618920e-02,  4.66315091e-01,  6.92730546e-01,
       -7.63213485e-02,  4.77842510e-01,  6.85679853e-01, -7.27370009e-02,
        4.83243853e-01,  6.81266129e-01, -7.03046173e-02,  4.46596950e-01,
        7.04264879e-01, -4.76823412e-02,  4.68390018e-01,  7.03801692e-01,
       -6.02172464e-02,  4.74339575e-01,  6.89740241e-01, -5.05784862e-02,
        4.75127488e-01,  6.80582762e-01, -4.36740629e-02,  4.50697601e-01,
        7.12537229e-01, -

In [20]:
#results.left_hand_landmarks.landmark
#lh
lh.shape

(63,)

In [21]:
pose.shape

(132,)

In [22]:
np.zeros(21*3)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [23]:
#extract_keypoints(results).shape
result_test = extract_keypoints(results)

In [24]:
result_test

array([ 0.50214785,  0.23726696, -0.2448819 , ...,  0.47238192,
        0.6902014 , -0.03199718])

In [25]:
468*3+33*4+21*3+21*3

1662

In [26]:
# 딥러닝 파일에 0.npy 파일 생김   test용
np.save('0', result_test) 

In [27]:
# path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data2')

# Actions that we try to detect
#actions = np.array(['coco','love','HM'])
actions = np.array(['C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','C12','C13',
                   'C14','C15','C16','C17','C18','C19','C20'])

# Thirty videos worth of data
no_sequences =  10#10      #29 , 16, 5

# videos are going to be 30 frames in length
sequence_length = 50#50    #30, 20, 100

In [25]:
# 폴더 세개 생성!! 딥러닝 -> actions 순서대로 생김
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except :
            pass

In [26]:
    
#     cap = cv2.VideoCapture("C:\Users\21SMT41\Desktop\deep_ex\aaaa\\NIA_SL_WORD1501_REAL01_"+num+".mp4")
    # Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    # New Loop
    # Loop through actions
    for i in range(0,20):
        
        #cap = cv2.VideoCapture("./img/NIA_SL_WORD175{}_REAL01_F.mp4".format(i+1))
        # Loop through sequences aka videos
        #count = 0
        for sequence in range(no_sequences):            
            #cap = cv2.VideoCapture("./img/NIA_SL_WORD275{}_REAL01_F.mp4".format(i+1))
            cap = cv2.VideoCapture("./img/C"+str(i + 1)+".mp4")
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):                
 
                # read feed
                ret, frame = cap.read()
                #frame = imutils.resize(frame, width=800)
                # Make detections
                image, results = mediapipe_detection(frame, holistic)
#                 print(results)

                # Draw landmarks
                draw_styled_landmarks(image, results)

                # New Apply wait logic
                if frame_num == 0:
                    cv2.putText(image, 'STRATING COLLECTION', (120,200),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number{} f_num{}'.format(actions[i], sequence,frame_num), (15,12),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv2.LINE_AA)
                    # show to screen

                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(500) # 1000

                else : 
                    cv2.putText(image, 'Collecting frames for {} Video Number{} f_num{}'.format(actions[i], sequence, frame_num), (15,12),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv2.LINE_AA)
                    # show to screen
                    cv2.imshow('OpenCV Feed', image)

                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, actions[i], str(sequence), str(frame_num))
                np.save(npy_path, keypoints)
                #count = count+1
#                 # 중간에 파일이 없어도 끊기지 않게
#                 for i in range(0,sequence_length):
#                     if os.path.isfile('C:/Users/21SMT37/3차 프젝/MP_Data/3차/8/{}.npy'.format(i)):
#                         pass
#                     else : 
#                         continue

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
#         cap.release()
#         cv2.destroyAllWindows()
cap.release()
cv2.destroyAllWindows()

In [27]:
cap.release()
cv2.destroyAllWindows()

In [28]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [29]:
label_map = {label:num for num, label in enumerate(actions)}

In [30]:
label_map

{'C1': 0,
 'C2': 1,
 'C3': 2,
 'C4': 3,
 'C5': 4,
 'C6': 5,
 'C7': 6,
 'C8': 7,
 'C9': 8,
 'C10': 9,
 'C11': 10,
 'C12': 11,
 'C13': 12,
 'C14': 13,
 'C15': 14,
 'C16': 15,
 'C17': 16,
 'C18': 17,
 'C19': 18,
 'C20': 19}

In [31]:
frame_num

NameError: name 'frame_num' is not defined

In [32]:
sequences, labels = [], []
for action in actions :
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence),"{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [33]:
np.array(sequences).shape

(200, 50, 1662)

In [34]:
np.array(labels).shape

(200,)

In [35]:
X = np.array(sequences)

In [36]:
X.shape

(200, 50, 1662)

In [37]:
y = to_categorical(labels).astype(int)

In [38]:
y

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                   test_size=0.25)

In [40]:
y_test.shape

(50, 20)

In [42]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, SimpleRNN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard

In [43]:
log_dir = os.path.join("Logs")
tb_callback = TensorBoard(log_dir=log_dir)

In [44]:
md = Sequential()
md.add(LSTM(128, return_sequences=True, activation='tanh', input_shape=(50,1662)))
md.add(LSTM(64, return_sequences=True, activation='tanh'))
md.add(LSTM(32, return_sequences=False, activation='tanh'))
md.add(Dense(64, activation='tanh'))
md.add(Dense(32, activation='tanh'))
md.add(Dense(actions.shape[0], activation='softmax'))

In [44]:
X.shape

(200, 50, 1662)

In [45]:
actions.shape[0]

20

In [46]:
res = [.7, 0.2, 0.1]

In [47]:
actions[np.argmax(res)]

'C1'

In [48]:
X_train.shape, y_train.shape

((150, 50, 1662), (150, 20))

In [49]:
#Adam(lr=0.0001)
md.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy',
           metrics=['categorical_accuracy']) # categorical_accuracy

In [50]:
h = md.fit(X_train, y_train, batch_size=128,
        epochs = 500, callbacks=[tb_callback]) # tensorboard_callback
# LOG파일안에 들어감

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 

Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 

Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


In [51]:
md.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 50, 128)           916992    
_________________________________________________________________
lstm_1 (LSTM)                (None, 50, 64)            49408     
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dense (Dense)                (None, 64)                2112      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 20)                660       
Total params: 983,668
Trainable params: 983,668
Non-trainable params: 0
__________________________________________________

In [59]:
# 시각화
plt.figure(figsize=(15, 5))

plt.plot(range(1, 501, 1),
         h.history['categorical_accuracy'],
         label='categorical_accuracy'
         )

plt.plot(range(1, 501, 1),
         h.history['loss'],
         label='loss'
         )
plt.grid()
plt.legend()
plt.show()

NameError: name 'h' is not defined

<Figure size 1080x360 with 0 Axes>

In [53]:
res = md.predict(X_test)

In [118]:
for i in range(0,10):
    actions[np.argmax(res[i])]
    actions[np.argmax(y_test[i])]
    print(actions[np.argmax(res[i])])
    print(actions[np.argmax(y_test[i])])
    print("==============================")

C1
C16
C1
C13
C1
C5
C1
C19
C1
C5
C1
C20
C1
C20
C1
C14
C1
C10
C1
C7


In [117]:
#np.sum(res[0])
actions[np.argmax(res[3])]
#actions[3]

'C1'

In [57]:
actions[np.argmax(y_test[6])]

'C3'

In [58]:
md.save('action.h5')

In [52]:
md.load_weights('action.h5')

In [53]:
from sklearn.metrics import multilabel_confusion_matrix,accuracy_score 

In [54]:
yhat = md.predict(X_train)

In [55]:
ytrue = np.argmax(y_train, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [56]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[141,   0],
        [  0,   9]],

       [[143,   0],
        [  0,   7]],

       [[144,   0],
        [  0,   6]],

       [[142,   0],
        [  0,   8]],

       [[143,   0],
        [  0,   7]],

       [[142,   0],
        [  0,   8]],

       [[141,   0],
        [  0,   9]],

       [[141,   0],
        [  0,   9]],

       [[143,   0],
        [  0,   7]],

       [[143,   0],
        [  0,   7]],

       [[144,   0],
        [  0,   6]],

       [[143,   0],
        [  0,   7]],

       [[142,   0],
        [  0,   8]],

       [[142,   0],
        [  0,   8]],

       [[143,   0],
        [  0,   7]],

       [[141,   0],
        [  0,   9]],

       [[140,   0],
        [  0,  10]],

       [[144,   0],
        [  0,   6]],

       [[145,   0],
        [  0,   5]],

       [[143,   0],
        [  0,   7]]], dtype=int64)

In [57]:
accuracy_score(ytrue, yhat)

1.0

In [58]:
md.evaluate(X_test, y_test)

RuntimeError: You must compile your model before training/testing. Use `model.compile(optimizer, loss)`.

In [65]:
colors = [(245,117,16),(117,245,16),(16,117,245),(245,117,16),(117,245,16),(16,117,245),(245,117,16)
         ,(117,245,16),(16,117,245),(245,117,16),(117,245,16),(16,117,245),(245,117,16),(117,245,16),(16,117,245),
         (245,117,16),(117,245,16),(16,117,245),(245,117,16)]
def prob_viz(res,actions,input_frame,colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40),(int(prob*100),90+num*40),colors[num],-1)
        cv2.putText(output_frame, actions[num],(0,85+num*40),cv2.FONT_HERSHEY_SIMPLEX,1,
                    (255,255,255),2,cv2.LINE_AA)
        
    return output_frame

In [66]:
res

array([4.8377425e-03, 1.0016668e-02, 7.0269720e-04, 5.3595090e-03,
       4.5815081e-04, 6.6672466e-03, 2.2623543e-02, 1.9071011e-02,
       4.3913402e-04, 5.3634332e-04, 8.2387322e-01, 1.0268092e-02,
       6.7437831e-03, 3.7950932e-03, 8.0697500e-04, 5.9810273e-02,
       2.6893665e-04, 4.9014268e-03, 1.1262407e-02, 7.5577516e-03],
      dtype=float32)

In [111]:
import cv2
# 1. New detection variables
sequence = []
sentence = []
threshold = 0.9
predictions = []
def most_frequent(data):
    return max(data, key=data.count)
#cap = cv2.VideoCapture("C:\\Users\\21SMT17\\Desktop\\수어 영상\\1.Training\\[원천]01_real_word_video\\01\\NIA_SL_WORD1771_REAL01_F.mp4")
cap = cv2.VideoCapture("./img/C2.mp4")
#cap = cv2.VideoCapture(0)
# Set mediapipe model

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:   
    while cap.isOpened():
    
        # read feed
        
        ret, frame = cap.read()
#        frame = imutils.resize(frame, width=800)
        if not ret:
            #print("비디오 읽기 실패 / 비디오 모두 읽음")
            print("result")
            cap.release()   # 비디오 읽기 종료
            cv2.destroyAllWindows()  # 새로 연 창을 모두 닫아줌
            break
            
        # Make detections
        image, results = mediapipe_detection(frame,holistic)
#         print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        #2. Prediction logic
        keypoints = extract_keypoints(results)
#        sequence.insert(0,keypoints)
#        sequence = sequence[:30]
        sequence.append(keypoints)
        sequence = sequence[-50:]
        
        if len(sequence) == 50 :  # 30 프레임            
            res = md.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
    
# #         # 3. Viz logic
#        if np.unique(predictions[-10:])==np.argmax(res): 
        if res[np.argmax(res)].any() > threshold: # 정확도
            if len(sentence) > 0:
                if actions[np.argmax(res)] != sentence[-1]:
                    sentence.append(actions[np.argmax(res)])
                    #print(actions[np.argmax(res)])
            else : 
                sentence.append(actions[np.argmax(res)])
                
        
#         if len(sentence) > 3 : #화면출력 글자 수
#             sentence = sentence[-3:]
                
#        # VIz probabilities
#        image = prob_viz(res, actions, image, colors)
            
#         cv2.rectangle(image, (0,0), (640,40), (245, 117, 16), -1)
#         cv2.putText(image, ' '.join(sentence), (3,30),
#                     cv2.FONT_HERSHEY_SIMPLEX,1, (255,255,255), 2, cv2.LINE_AA)
        
        # show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    print(actions[np.argmax(res)])
    cap.release()
    cv2.destroyAllWindows()
#print(most_frequent(sentence))

C2
C2
C2
C2
C2
C8
C8
C2
C2
C2
C2
C2
C2
C2
C2
C2
C2
C2
C2
C2
C2
C2
C2
C8
C8
C8
C8
C8
C8
C8
C8
C8
C8
C8
C8
C8
C8
C12
C12
C8
C8
C8
C8
C8
C8
C8
C8
result
C8


In [103]:
cap.release()
cv2.destroyAllWindows()

In [80]:
actions[np.argmax(res)]

'C2'

In [93]:
res[np.argmax(res)] > threshold

True

In [162]:
cap.release()
cv2.destroyAllWindows()

In [95]:
np.expand_dims(X_test[0], axis=0)

array([[[ 0.50501913,  0.2383828 , -0.33864751, ...,  0.40609717,
          0.95640504,  0.03483533],
        [ 0.50501168,  0.23563646, -0.3421663 , ...,  0.40827882,
          0.94811821,  0.03717453],
        [ 0.50495642,  0.23399298, -0.3708778 , ...,  0.41251111,
          0.94127142,  0.02812738],
        ...,
        [ 0.50673157,  0.24319147, -0.25979263, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.50583136,  0.24316476, -0.27586406, ...,  0.43466455,
          0.35418546, -0.06071239],
        [ 0.50506914,  0.24269697, -0.30533764, ...,  0.42042333,
          0.3390345 , -0.04357424]]])

In [96]:
md.predict(np.expand_dims(X_test[0], axis=0))

array([[2.3925745e-04, 2.5759581e-03, 1.9768402e-03, 3.1190534e-04,
        5.2450381e-05, 9.6837246e-01, 1.8717400e-03, 2.9972184e-03,
        8.5198809e-04, 2.5225597e-04, 2.6976778e-03, 8.0130274e-05,
        3.6315713e-03, 1.9323592e-03, 1.1952552e-03, 1.1286653e-04,
        1.3689073e-03, 5.6047798e-03, 1.8656834e-03, 2.0086193e-03]],
      dtype=float32)