<a href="https://colab.research.google.com/github/choeuneheol/python-practice/blob/master/youtube0731.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import time, os

In [None]:
actions = ['come','away','spin']
seq_length = 30
secs_for_action = 30

#mediapipe hands model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1,
                      min_detection_confidence=0.5,
                      min_tracking_confidence=0.5)

cap = cv2.VideoCapture(0)

created_time = int(time.time())
os.makedirs('dataset', exist_ok=True)

while cap.isOpened():
    for idx, action in enumerate(actions):
        data = []
        
        ret, img = cap.read()
        
        img = cv2.flip(img,1)
        
        cv2.putText(img, f'Waiting for collecting {action.upper()} action...', org=(10,30),
                   fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255,), thickness=2)
        cv2.imshow('img', img)
        cv2.waitKey(3000)
        
        start_time = time.time()
        
        while time.time() - start_time < secs_for_action:
            ret, img = cap.read()
            
            img = cv2.flip(img,1)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            result = hands.process(img)
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            
            if result.multi_hand_landmarks is not None:
                for res in result.multi_hand_landmarks:
                    joint = np.zeros((21,4))
                    for j, lm in enumerate(res.landmark):
                        joint[j] = [lm.x, lm.y, lm.z, lm.visibility]
                        
                    #Compute angles between joints
                    v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19], :3]#parent joint
                    v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], :3]#child joint
                    v = v2 - v1 #[20, 3]
                    #Normalize v
                    v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]
                    
                    #Get angle using arcos if dot product
                    angle = np.arccos(np.einsum('nt,nt->n',
                                               v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:],
                                               v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:])) # [15,]
                    angle = np.degrees(angle) # Convert radian to degree
                    
                    angle_label = np.array([angle], dtype=np.float32)
                    angle_label = np.append(angle_label, idx)#com,0 away,1 sping,2
                    
                    d = np.concatenate([joint.flatten(),angle_label])
                    
                    data.append(d)
                    
                    mp_drawing.draw_landmarks(img, res, mp_hands.HAND_CONNECTIONS)
                    
            cv2.imshow('img', img)
            if cv2.waitKey(1) == ord('q'):
                break
                
        data = np.array(data)
        print(action, data.shape)
        np.save(os.path.join('dataset', f'raw_{action}_{created_time}'), data)
        
        #Create sequence data
        full_seq_data = []
        for seq in range(len(data) - seq_length):
            full_seq_data.append(data[seq:seq + seq_length])
            
        full_seq_data = np.array(full_seq_data)
        print(action, full_seq_data.shape)
        np.save(os.path.join('dataset', f'seq_{action}_{created_time}'), full_seq_data)
    break
    
         

come (5, 100)
come (0,)
away (1, 100)
away (0,)
spin (0,)
spin (0,)


In [None]:
import numpy as np
import os

In [None]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

actions = [
    'come',
    'away',
    'spin'
]

data = np.concatenate([
    np.load('dataset/seq_come_1627646273.npy'),
    np.load('dataset/seq_away_1627646273.npy'),
    np.load('dataset/seq_spin_1627646273.npy')
], axis=0)

data.shape

In [None]:
x_data = data[:, :, :-1]
labels = data[:, 0, -1]

print(x_data.shape)
print(labels.shape)

In [None]:
from tensorflow.keras.utils import to_categorical

In [None]:
y_data = to_categorical(labels, num_classes=len(actions))
y_data.shape

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_data = x_data.astype(np.float32)
y_data = y_data.astype(np.float32)

x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=0.1, random_state=2022)

print(x_train.shape, y_train.shape)
print(x_val.shape, y_val.shape)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [None]:
model = Sequential([
    LSTM(64, activation='relu', input_shape=x_train.shape[1:3]),
    Dense(32, activation='relu'),
    Dense(len(actions), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
model.summary()

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

In [None]:
history = model.fit(x_train,
                   y_trian,
                   validation_data=(x_val, y_val),
                   epochs =200,
                   callbacks=[
                       ModelCheckpoint('models/model.h5', monitor='val_acc', verbose=1, save_best_only=True, mode='auto'),
                       ReduceLROnPlateau(monitor='val_acc', factor=0.5, patience=50, verbose=1, mode='auto')
                   ])

In [None]:
import matplotlib.pyplot as plt

fig, loss_ax = plt.subplots(figsize=(16,10))
acc_ax = loss_ax.twinx()

loss_ax.plot(history.history['loss'],'y',label='train loss')
loss_ax.plot(history.history['val_loss'],'r',label='val loss')
loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
loss_ax.legend(loc='upper left')

acc_ax.plot(history.history['acc'],'b',label='train acc')
acc_ax.plot(history.history['val_acc'],'g',label='val_acc')
acc_ax.set_ylabel('accuracy')
acc_ax.legend(loc='upper left')

plt.show()



In [None]:
from sklearn.metrics import multilabel_confusion_matrix
from tensorflow.keras.models import load_model

In [None]:
model = load_model('models/model.h5')

y_pred = model.predict(x_val)

multilabel_confusion_matrix(np.argmax(y_val,  axis=1), np.argmax(y_pred, axis=1))

In [None]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model

In [None]:
action = ['come', 'away', 'spin']
seq_length = 30

model = load_model('models/model2_1.0.h5')

#Mediapipe hands model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(max_num_hands=1,
                      min_detection_confidence=0.5,
                      min_tracking_confidence=0.5)

cap = cv2.VideoCapture(0)
created_time = int(time.time())
os.makedirs('dataset', exist_ok=True)

while cap.isOpened():
    for idx, action in enumerate(actions):
        data = []
        
        ret, img = cap.read()
        
        img = cv2.flip(img,1)
        
        cv2.putText(img, f'Waiting for collecting {action.upper()} action...', org=(10,30),
                   fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 255, 255,), thickness=2)
        cv2.imshow('img', img)
        cv2.waitKey(3000)
        
        start_time = time.time()
        
        while time.time() - start_time < secs_for_action:
            ret, img = cap.read()
            
            img = cv2.flip(img,1)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            result = hands.process(img)
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            
            if result.multi_hand_landmarks is not None:
                for res in result.multi_hand_landmarks:
                    joint = np.zeros((21,4))
                    for j, lm in enumerate(res.landmark):
                        joint[j] = [lm.x, lm.y, lm.z, lm.visibility]
                        
                    #Compute angles between joints
                    v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19], :3]#parent joint
                    v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], :3]#child joint
                    v = v2 - v1 #[20, 3]
                    #Normalize v
                    v = v / np.linalg.norm(v, axis=1)[:, np.newaxis]
                    
                    #Get angle using arcos if dot product
                    angle = np.arccos(np.einsum('nt,nt->n',
                                               v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:],
                                               v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:])) # [15,]
                    angle = np.degrees(angle) # Convert radian to degree
                    
                    angle_label = np.array([angle], dtype=np.float32)
                    angle_label = np.append(angle_label, idx)#com,0 away,1 sping,2
                    
                    d = np.concatenate([joint.flatten(),angle_label])
                    
                    data.append(d)
                    
                    mp_drawing.draw_landmarks(img, res, mp_hands.HAND_CONNECTIONS)
                    
                    if len(seq) > seq_tength:
                        continue
                        
                    input_data = np.expand_dims(np.array(seq[-seq_length:], dtype=np.float32), axis=0)
                    
                    i_pred = int(np.argmax(y_prad))
                    conf = y_pred[i_pred]
                    
                    if conf < 0.9:
                        continue
                        
                    action = ations[i_pred]
                    action_seq.append(action)
                    
                    if len(action_seq) < 3:
                        continue
                        
                    this_action = '?'
                    if action_seq[-1] == action_seq[-2] == action_seq[-3]:
                        this_action = action
                    
                    cv2.putText(img, f'{this_action.upper()}', org=(int(res,landmark[0].x * img.shape[1]), int(
                    res.landmark[0].y * img.shape[0] + 20)), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1,
                               color=(255,255,255), thickness=2)                   
                    
            cv2.imshow('img', img)
            if cv2.waitKey(1) == ord('q'):
                break
                
        data = np.array(data)
        print(action, data.shape)
        np.save(os.path.join('dataset', f'raw_{action}_{created_time}'), data)
        
        #Create sequence data
        full_seq_data = []
        for seq in range(len(data) - seq_length):
            full_seq_data.append(data[seq:seq + seq_length])
            
        full_seq_data = np.array(full_seq_data)
        print(action, full_seq_data.shape)
        np.save(os.path.join('dataset', f'seq_{action}_{created_time}'), full_seq_data)
    break
