In [1]:
import tensorflow as tf
import os
import numpy as np
import pandas as pd

In [2]:
def process_csv_file(file):
    csv_data = pd.read_csv(file)
    csv_data = csv_data.drop("class", axis = 1)
    return (csv_data.to_numpy())

In [3]:
def set_padding(arr, max_length):
    arr = np.append(arr, np.zeros((max_length-arr.shape[0],300)), axis=0)
    return np.expand_dims(arr, 0)

In [4]:
def prepare_data(folder_path, labels):
    total = 0
    max_length = 0
    # find max length
    for folder in os.listdir(folder_path):
        if folder != ".DS_Store":
            total += len(os.listdir(os.path.join(folder_path, folder)))
            for file in os.listdir(os.path.join(folder_path, folder)):
                data = process_csv_file(os.path.join(folder_path, folder, file))
                if data.shape[0] > max_length:
                    max_length = data.shape[0]
    # Make the arrays
    X = np.empty((0, max_length, 300))
    Y = np.empty((0,), int)
    for folder in os.listdir(folder_path):
        print(folder)
        if folder != ".DS_Store":
            for file in os.listdir(os.path.join(folder_path, folder)):
                data = process_csv_file(os.path.join(folder_path, folder, file))
                data = set_padding(data, max_length)
                X = np.vstack((X, data))
                Y = np.append(Y, labels[folder])
    print(total, max_length)
    return X, Y


In [5]:
folder_path = "/Users/aly/Documents/Programming/Apps/Machine Learning/ASL Converter/training_models/mediapipe/reformatting-the-data/data_four_labels/"
labels= {"coffee": 0, 'dog': 1, 'milk': 2, 'door': 3}
X, Y = prepare_data(folder_path, labels)

.DS_Store
dog
milk
coffee
door
143 211


In [6]:
padded_inputs = tf.keras.preprocessing.sequence.pad_sequences(
    [
    [711, 632, 71],
    [73, 8, 3215, 55, 927],
    [83, 91, 1, 645, 1253, 927],
], padding="post"
)
# print(X[0][1])
print(((padded_inputs)))

[[ 711  632   71    0    0    0]
 [  73    8 3215   55  927    0]
 [  83   91    1  645 1253  927]]


In [7]:
Y = np.expand_dims(Y, axis=0)
Y= Y.T
Y

array([[1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
       [2],
    

In [8]:
# split data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [9]:
X_train.shape, y_train.shape

((114, 211, 300), (114, 1))

In [11]:
from tensorflow.keras.layers import Dense, LSTM,RepeatVector ,TimeDistributed, Flatten
from tensorflow.keras import Sequential

# define model
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(211,300), return_sequences=True))
model.add(LSTM(64, activation='relu', return_sequences=False))
model.add(RepeatVector(211))
# model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(4, activation='softmax')))

# compile model



In [12]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam

model.compile(loss=SparseCategoricalCrossentropy(), optimizer=Adam(), metrics=["accuracy"])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 211, 300)          721200    
                                                                 
 lstm_4 (LSTM)               (None, 64)                93440     
                                                                 
 repeat_vector (RepeatVector  (None, 211, 64)          0         
 )                                                               
                                                                 
 lstm_5 (LSTM)               (None, 211, 64)           33024     
                                                                 
 lstm_6 (LSTM)               (None, 211, 128)          98816     
                                                                 
 flatten (Flatten)           (None, 27008)             0         
                                                      

In [13]:
history = model.fit(X_train,y_train, epochs=10, validation_split=0.1)

Epoch 1/10


2023-03-18 02:13:10.377603: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-03-18 02:13:11.903697: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




KeyboardInterrupt: 

In [38]:
x = np.array([[4], [2]])

In [41]:
x.T.shape

(1, 2)

In [42]:
x.T

array([[4, 2]])