In [18]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

### Preprocess Data 

In [19]:
actions = np.array(['good', 'weaving', 'red_light', 'cross_yellow'])
label_map = {label:num for num, label in enumerate(actions)}

In [20]:
label_map

{'good': 0, 'weaving': 1, 'red_light': 2, 'cross_yellow': 3}

In [21]:
sequences, labels = [], []
starting_label = 220

kill = 0

cropped_img_size = 300*300*3
for action in actions:
    for sequence in range(15):
        window = []
        for frame_num in range(10):
            keypoints_norm = []
            keypoints = np.loadtxt('../../data/cars/labels/all/%s.txt' % starting_label, dtype=float)
            img = cv2.imread(os.path.join('../../data/recordings', action, str(sequence), 'videos/%s.png' % frame_num))
            sensor_file = open(os.path.join('../../data/recordings', action, str(sequence), 'sensors/%s.txt' % frame_num))
            red_light_sensor = np.array([int(sensor_file.readline().split()[0])])
            img_norm = img.astype(np.float32) / 255.0
            
            xtl,ytl,w,h = keypoints[1], keypoints[2], keypoints[3], keypoints[4]
            w = int(w * 1920)
            h = int(h * 1080)
            xtl = int((xtl * 1920) - (w/2))
            ytl = int((ytl * 1080) - (h/2))
            xbr = xtl + w 
            ybr = ytl + h
            #print(xtl, ytl,  w, h)
            #print(img.shape)
            #cv2.rectangle(img, (xtl, ytl), (xbr, ybr), [255,0,0], 1)
            #cv2.imshow('Image', img) 
            #cv2.waitKey(0) 
            #cv2.destroyAllWindows()

            keypoints_norm.append(xtl/1920)
            keypoints_norm.append(ytl/1080)
            keypoints_norm.append(xbr/1920)
            keypoints_norm.append(ybr/1080)

            keypoints_points = keypoints[5:]
            for i in range(0, len(keypoints_points), 3):
                k = i + 2
                if (k > len(keypoints_points)):
                    break
                if keypoints_points[k] == 0:
                    keypoints_norm.append(0)
                    keypoints_norm.append(0)
                else:
                    keypoints_norm.append(keypoints_points[i])
                    keypoints_norm.append(keypoints_points[i+1])

            xtl += int(w/2)
            ytl += int(h/2)
            cropped_img = img[ytl-150:ytl+150, xtl-150:xtl+150]

            if len(cropped_img) == 0 or np.shape(cropped_img) == ():
                cropped_img_norm_flattened = np.zeros(cropped_img_size)
            else:
                cropped_img_norm = cropped_img.astype(np.float32) / 255.0
                cropped_img_norm_flattened = cropped_img_norm.flatten()
                #cv2.imshow("Cropped ROI", cropped_img)
                #cv2.waitKey(0)
                #cv2.destroyAllWindows() 
                #cv2.imwrite('../../data/zoomed/%s.png' % kill, cropped_img)

            pad_width = cropped_img_size - len(cropped_img_norm_flattened)
            cropped_img_norm_flattened = np.pad(cropped_img_norm_flattened, (0, pad_width), mode='constant', constant_values=0)

            kill += 1
            #if kill % 15 == 0:
                #print(len(cropped_img))
                #cv2.imshow("Cropped ROI", cropped_img)
                #cv2.waitKey(0)
                #cv2.destroyAllWindows() 

            starting_label += 1
            sensor_file.close()

            #if kill ==50:
                #print(cropped_img_norm_flattened[-10:])
                #os.exit(1)

            res = np.concatenate([cropped_img_norm_flattened, keypoints_norm, red_light_sensor])
            #print(keypoints.shape)
            #print(img_array.shape)
            #print(red_light_sensor.shape)
            #print(res.shape)
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [22]:
X = np.array(sequences)

In [23]:
X.shape

(60, 10, 270037)

In [24]:
np.array(labels).shape

(60,)

In [25]:
y = to_categorical(labels).astype(int)

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [27]:
X_train.shape

(57, 10, 270037)

### Build and Train LSTM

In [28]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [29]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(10,270037)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))




In [32]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [33]:
model.fit(X_train, y_train, epochs=3000, callbacks=[tb_callback], batch_size=64)

Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
Epoch 7/3000
Epoch 8/3000
Epoch 9/3000
Epoch 10/3000
Epoch 11/3000
Epoch 12/3000
Epoch 13/3000
Epoch 14/3000
Epoch 15/3000
Epoch 16/3000
Epoch 17/3000
Epoch 18/3000
Epoch 19/3000
Epoch 20/3000
Epoch 21/3000
Epoch 22/3000
Epoch 23/3000
Epoch 24/3000
Epoch 25/3000
Epoch 26/3000
Epoch 27/3000
Epoch 28/3000
Epoch 29/3000
Epoch 30/3000
Epoch 31/3000
Epoch 32/3000
Epoch 33/3000
Epoch 34/3000
Epoch 35/3000
Epoch 36/3000
Epoch 37/3000
Epoch 38/3000
Epoch 39/3000
Epoch 40/3000
Epoch 41/3000
Epoch 42/3000
Epoch 43/3000
Epoch 44/3000
Epoch 45/3000
Epoch 46/3000
Epoch 47/3000
Epoch 48/3000
Epoch 49/3000
Epoch 50/3000
Epoch 51/3000
Epoch 52/3000
Epoch 53/3000
Epoch 54/3000
Epoch 55/3000
Epoch 56/3000
Epoch 57/3000
Epoch 58/3000
Epoch 59/3000
Epoch 60/3000
Epoch 61/3000
Epoch 62/3000
Epoch 63/3000
Epoch 64/3000
Epoch 65/3000
Epoch 66/3000
Epoch 67/3000
Epoch 68/3000
Epoch 69/3000
Epoch 70/3000
Epoch 71/3000
Epoch 72/3000
E

<keras.src.callbacks.History at 0x21e622a9ad0>

In [34]:
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 64)            69146112  
                                                                 
 lstm_1 (LSTM)               (None, 10, 128)           98816     
                                                                 
 lstm_2 (LSTM)               (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 4)                 132       
                                                                 
Total params: 69300708 (264.36 MB)
Trainable params: 693

In [35]:
res = model.predict(X_test)



In [36]:
actions[np.argmax(res[2])]

'cross_yellow'

In [37]:
actions[np.argmax(y_test[2])]

'cross_yellow'

#### Evaluation

In [38]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [39]:
yhat = model.predict(X_test)



In [40]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [41]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[2, 0],
        [1, 0]],

       [[2, 1],
        [0, 0]],

       [[2, 0],
        [0, 1]],

       [[2, 0],
        [0, 1]]], dtype=int64)

In [42]:
accuracy_score(ytrue, yhat)

0.6666666666666666

In [43]:
model.save('car_action_v1.h5')

  saving_api.save_model(
