## 1.1 **Dataset Preprocessing:** Load, Clean, Preprocess data

In [1]:
# =======================================================
# ============== Load the dataset =======================
# =======================================================

import pandas as pd
import os
import warnings

warnings.filterwarnings('ignore')
directory = 'dataset/' # local directory
directory = '/kaggle/input/jules-varne/dataset/' # kaggle directory

df = pd.DataFrame()

for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        currFile = pd.read_csv(os.path.join(directory, filename))
        df = pd.concat([df, currFile], ignore_index=True)

print("1.1 - 1: Dataset Loaded")
# =======================================================
# =============== Cleaning the dataset ==================
# =======================================================

columns = [
    "RecordingTime [ms]",
    "Time of Day [h:m:s:ms]",
    "Participant", 
    "Pupil Diameter Right [mm]",
    "Pupil Diameter Left [mm]",
    "Point of Regard Right X [px]",
    "Point of Regard Right Y [px]",
    "Point of Regard Left X [px]", 
    "Point of Regard Left Y [px]", 
    "Category Right", 
    "Category Left",

]


df = df[columns] # DONE: remove these extra variables. They are eating up memory.

# --------------------------------------------------

# df = df_0_columns_separated 
df["RecordingTime [ms]"].describe()

is_null_in_column = df['RecordingTime [ms]'].isnull().any()
is_null_in_column 

# df_1_rec_time_processed = df # TODO: remove these extra variables. They are eating up memory.

# --------------------------------------------------


# df = df_1_rec_time_processed
df["Time of Day [h:m:s:ms]"].describe()

is_null_in_column = df['Time of Day [h:m:s:ms]'].isnull().any()
is_null_in_column # same, no null

# df_2_time_of_day = df
# --------------------------------------------------

# df = df_2_time_of_day
df = df[(df['Participant'] != 'Unidentified(Neg)') & (df['Participant'] != 'Unidentified(Pos)')]
df["Participant"] = pd.to_numeric(df["Participant"], errors='coerce')

# df_3_participant = df # DONE: remove these extra variables. They are eating up memory.
# --------------------------------------------------
# df = df_3_participant


column_name = "Pupil Diameter Right [mm]"

df[column_name] = df[column_name].astype(str) 
df = df[pd.to_numeric(df[column_name], errors='coerce').notnull()]
df[column_name] = pd.to_numeric(df[column_name], errors='coerce') 
df = df[df[column_name] != 0] 

column_name = "Pupil Diameter Left [mm]"

df[column_name] = df[column_name].astype(str)
df = df[pd.to_numeric(df[column_name], errors='coerce').notnull()]  
df[column_name] = pd.to_numeric(df[column_name], errors='coerce') 
df = df[df[column_name] != 0] 


# df_5_pupil_both = df # TODO: remove these extra variables. They are eating up memory.
# --------------------------------------------------
point_of_regard_idx = [
    "Point of Regard Right X [px]",
    "Point of Regard Right Y [px]",
    "Point of Regard Left X [px]",
    "Point of Regard Left Y [px]",
]


for point in point_of_regard_idx:
    df.dropna(subset=[point], inplace=True)
    df = df.drop(df[df[point] == '-'].index)
    a = pd.to_numeric(df[point])

# df_9_point_of_regard = df_5_pupil_both # TODO: remove these extra variables. They are eating up memory.
# --------------------------------------------------

stays = [
    "Fixation",
    "Saccade",
    "Blink"
]

goes = [
    "Separator",
    "-",
    "Left Click",
    # Null
]


# df = df_9_point_of_regard

df = df[df['Category Left'].isin(stays)]
df = df[df['Category Right'].isin(stays)]

df = pd.get_dummies(df, columns=['Category Left'], prefix=['Category Left'])
df = pd.get_dummies(df, columns=['Category Right'], prefix=['Category Right'])
df.head()

# df_final = df
# df = df_final
# --------------------------------------------------
metadata_dir = "./Metadata_Participants.csv" #local dir
metadata_dir = "/kaggle/input/junes-verne-metadata/Metadata_Participants.csv" #kaggle dir

metadata = pd.read_csv(metadata_dir)
subjectClass = dict()
for idx, row in metadata.iterrows():
    subject = (row["ParticipantID"])
    category = (row["Class"])
    subjectClass[subject] = 1 if category == 'ASD' else 0

def getClass(subjectId):
    return subjectClass[subjectId]

print("1.1 - 2: Dataset Cleaning Complete")
# --------------------------------------------------

1.1 - 1: Dataset Loaded
1.1 - 2: Dataset Cleaning Complete


In [2]:
df.head()

Unnamed: 0,RecordingTime [ms],Time of Day [h:m:s:ms],Participant,Pupil Diameter Right [mm],Pupil Diameter Left [mm],Point of Regard Right X [px],Point of Regard Right Y [px],Point of Regard Left X [px],Point of Regard Left Y [px],Category Left_Blink,Category Left_Fixation,Category Left_Saccade,Category Right_Blink,Category Right_Fixation,Category Right_Saccade
194500,4780020.865,13:22:17:260,50,4.3785,4.5431,595.6325,375.6149,595.6325,375.6149,False,True,False,False,True,False
194501,4780040.758,13:22:17:279,50,4.405,4.5283,597.3023,375.7919,597.3023,375.7919,False,True,False,False,True,False
194502,4780080.503,13:22:17:319,50,4.4273,4.6036,596.9685,378.2868,596.9685,378.2868,False,True,False,False,True,False
194503,4780120.36,13:22:17:359,50,4.3514,4.5827,596.4349,378.5467,596.4349,378.5467,False,True,False,False,True,False
194504,4780140.235,13:22:17:379,50,4.3538,4.5399,599.7004,379.7574,599.7004,379.7574,False,True,False,False,True,False


After grouping by sessions is done, we can do this.

- Consider each individual session as a singular data unit.
- From each units of data, compute all the feautures, and feed it into the classifier for proper patterns recognition
- Goal is to find as many features, correlated to ASD vs TD as possible.
- Hypothesis is there should be subtle patterns in eye movement speeds, fixation duration, and other similar charecteristics
- Eye pupil diameters reveal one's interest in a certain object / event / topic. Based on this, we can calculate how long a subject is able to hold his/her interest, and possibility is, that people with ASD will have different duration they can hold interests, ideally more on objects, less on people, and maybe similar to ADHD, there is some other patterns that otherwise would've been overlooked, in the topic of interest. Possible patterns:
    - How long can a subject hold interest
    - How often does he/she change it
    - Fatigue? How long till he loses interest, in a given session (is this correlated?)
    - Saccade movement, and pupil diameter, is there any correlation / patterns
- Eye movement is typically faster in ASD (ref. - )
- Ability to focus on an object right after fast switching of gaze, is slower among ASD (ref. - )
- Eye movements should be far less among TD than ASD, ASD people's gaze movement should be faster, little erratic, and possibly a bit jittery. Unable to focus.
- 

## 1.2. **Feature Engineering**: Separate Sessions, Compute Outputs, Compute Features, Remove Unnecessary Columns

In [3]:
# =======================================================
# =========== The Feature Compute Functions =============
# =======================================================

import math
import numpy as np
import time

def getDistanceBetweenPoints(row, prevRow):
    leftX = float(row["Point of Regard Left X [px]"])
    leftY = float(row["Point of Regard Left Y [px]"])
    rightX = float(row["Point of Regard Right X [px]"])
    rightY = float(row["Point of Regard Right Y [px]"])

    x1 = (leftX + rightX) / 2
    y1 = (leftY + rightY) / 2

    prevLeftX = float(prevRow["Point of Regard Left X [px]"])
    prevLeftY = float(prevRow["Point of Regard Left Y [px]"])
    prevRightX = float(prevRow["Point of Regard Right X [px]"])
    prevRightY = float(prevRow["Point of Regard Right Y [px]"])

    x2 = (prevLeftX + prevRightX) / 2
    y2 = (prevLeftY + prevRightY) / 2

    distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)

    return distance


def getChangesInDistanceBetweenPoints(currDist, prevDist):
    return abs(currDist - prevDist)

def getGazeSpeed(dist, duration):
    return dist / duration

def getChangesInPupilDiameter(curr, prev):
    return abs(curr - prev)

### Additional Features:

# Compound features I'm going to compute:
# 1. Changes in pupil diameter (left, right) - easy
# 2. Gaze Vectors?
# What kinds of patterns am I looking for?
# 1. Fast gaze movmenets - requires gaze speed calculation
# 2. Gaze distance covered - requires compound / cumulitive distance computation
# 3. Gaze speed changes - compute from the 1st feature here.
# 4. Flactuations in gaze speed
# Okay! Let's compute these 4 features as well!! See if it improves the accuracy!


# ==========================================================
# ================== Separate Sessions =====================
# ==========================================================
computed = False # Hardcoded false
# computed = True # Hardcoded

allSessions = []

if True:
    print("---- Started separating sessions -----")
    y = []
    currSession = pd.DataFrame()

    prevId = -1
    cnt = 0

    start_time = time.time()

    for index, row in df.iterrows():
        if row["Participant"] != prevId:
            allSessions.append(currSession.T)
            y.append(row["Participant"])
            currSession = pd.DataFrame()
        prevId = row["Participant"]
        currSession = pd.concat([currSession, row], axis=1)

    print("--- %s seconds ---" % (time.time() - start_time))
  
    X = allSessions[1:]

print("2.1 - 3: Sessions Seperated")


# ==================================================================
# ============== Compute the features, and add them  ===============
# ============== Add remove the unnecessary features ===============
# ==================================================================

X = allSessions[1:]

for j in range(len(X)):
    df = X[j]    
    # ---------------- Step 1 ---------------        
    prevRow = None
    prevDist = 0
    
    feat1_dist = []
    feat2_dist_diffs = []

    for i, row in df.iterrows():
        if prevRow is None:
            prevRow = row
        dist = getDistanceBetweenPoints(row, prevRow)
        dist_diff = getChangesInDistanceBetweenPoints(dist, prevDist)
        # speed = 

        prevRow = row
        prevDist = dist

        feat1_dist.append(dist)
        feat2_dist_diffs.append(dist_diff)
    # ---------------- Step 2 ---------------
    # To DataFrame
    feat1_dist_df = pd.DataFrame(feat1_dist)
    feat2_dist_diffs_df = pd.DataFrame(feat2_dist_diffs)

    # Set column names / feature names
    feat1_dist_df.columns = ["Distance"]
    feat2_dist_diffs_df.columns = ["Distance Difference"]
    
    # ---------------- Step 3 ---------------
    as_is = [
        "Participant",
        "Category Left_Blink",
        "Category Left_Fixation",
        "Category Left_Saccade",
        "Category Right_Blink",
        "Category Right_Fixation",
        "Category Right_Saccade",
        "Pupil Diameter Right [mm]",
        "Pupil Diameter Left [mm]",
        
        "Point of Regard Right X [px]",
        "Point of Regard Right Y [px]",
        "Point of Regard Left X [px]", 
        "Point of Regard Left Y [px]", 
    ]

    df_as_is = df[as_is]
    df_as_is.reset_index(drop=True, inplace=True)
    feat1_dist_df.reset_index(drop=True, inplace=True)
    feat2_dist_diffs_df.reset_index(drop=True, inplace=True)


    result = pd.concat([df_as_is, feat1_dist_df, feat2_dist_diffs_df], axis=1, join='outer')
#     result = pd.concat([df_as_is, feat1_dist_df], axis=1, ignore_index=True)
#     print(result.shape, " == ", df.shape)
    X[j] = result
    
print("2.1 - 4: Feature Computation Complete")

# =========================================================
# =============== Pad the dataset, to make  ===============
# =============== all of them same sized    ===============
# =========================================================

for i in range(len(X)):
    X[i] = np.array(X[i])

def pad_dataset(data, target_length):
    num_padding_rows = target_length - data.shape[0]
    padding = np.zeros((num_padding_rows, data.shape[1]))
    padded_data = np.concatenate((data, padding))
    return padded_data

# debug_X = X[:]
# print("debug: X at this point, stored as `debug_X`")
max_shape = max([data.shape[0] for data in X])
print("debug: Max Shape = ", max_shape)
padded_X = [pad_dataset(data, max_shape) for data in X]
padded_X = np.array(padded_X)

X = np.array(padded_X)
y = np.array(y)

print("2.1 - 5: Dataset padding for uniform shape - complete.")

# ==================================================================
# ============== Compute the output variables ======================
# ==================================================================

y = []
for x in X:
    y.append(getClass(x[0][0]))
    continue
# len(y)

print("2.1 - 6: Output Variable Computed")

---- Started separating sessions -----
--- 978.3524844646454 seconds ---
2.1 - 3: Sessions Seperated
2.1 - 4: Feature Computation Complete
debug: Max Shape =  12055
2.1 - 5: Dataset padding for uniform shape - complete.
2.1 - 6: Output Variable Computed


## **NOTE**: (categories, ASD = 0, TD = 1)
ASD = 0

TD = 1

# 2. Training

In [4]:
import numpy as np
from sklearn.model_selection import train_test_split

# Assuming X and y are your original data arrays

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ensure correct data types
X_train = np.array(X_train, dtype=np.float32)
y_train = np.array(y_train, dtype=np.int32)

# Rest of your code...

In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Define the input shape
input_shape = (12055, 15)

# Create the model
model = Sequential()

# Add an LSTM layer
model.add(LSTM(64, input_shape=input_shape))

# Add a dense output layer with sigmoid activation for binary classification
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

# Fit the model
model.fit(X_train, y_train, epochs=10, batch_size=32)

# Save the model
model.save('lstm_model.h5')


2024-06-08 14:52:58.417818: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-08 14:52:58.417913: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-08 14:52:58.542442: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 433ms/step - accuracy: 0.6959 - loss: 0.6825
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 432ms/step - accuracy: 0.6620 - loss: 0.6515
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 428ms/step - accuracy: 0.6880 - loss: 0.6210
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 428ms/step - accuracy: 0.6680 - loss: 0.6359
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 429ms/step - accuracy: 0.6869 - loss: 0.6223
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 429ms/step - accuracy: 0.6560 - loss: 0.6451
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 431ms/step - accuracy: 0.6796 - loss: 0.6308
Epoch 8/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 428ms/step - accuracy: 0.6703 - loss: 0.6348
Epoch 9/10
[1m15/15[0m [32m━━━━━━━━━

In [6]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Assuming you have your test data X_test and y_test
X_test = np.array(X_test, dtype=np.float32)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Convert predictions to binary class labels
y_pred_classes = np.round(y_pred).astype(int)

# Print the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_classes))

# Generate and print the confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_classes))

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 239ms/step
Classification Report:
              precision    recall  f1-score   support

           0       0.68      1.00      0.81        78
           1       0.00      0.00      0.00        36

    accuracy                           0.68       114
   macro avg       0.34      0.50      0.41       114
weighted avg       0.47      0.68      0.56       114

Confusion Matrix:
[[78  0]
 [36  0]]


In [7]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout

# Define the input shape
input_shape = (12055, 15)

model = Sequential()
model.add(LSTM(128, input_shape=input_shape))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
model.fit(X_train, y_train, epochs=10, batch_size=32)
model.save('lstm_model.h5')

Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 495ms/step - accuracy: 0.6833 - loss: 0.6853
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 495ms/step - accuracy: 0.7020 - loss: 0.6509
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 495ms/step - accuracy: 0.6357 - loss: 0.6689
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 495ms/step - accuracy: 0.6537 - loss: 0.6542
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 497ms/step - accuracy: 0.6833 - loss: 0.6293
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 495ms/step - accuracy: 0.6719 - loss: 0.6326
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 495ms/step - accuracy: 0.6956 - loss: 0.6157
Epoch 8/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 495ms/step - accuracy: 0.7121 - loss: 0.6070
Epoch 9/10
[1m15/15[0m [32m━━━━━━━━━

In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Dropout

# Define the input shape
input_shape = (12055, 15)

model = Sequential()
model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
model.fit(X_train, y_train, epochs=10, batch_size=32)
model.save('lstm_model.h5')

Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1s/step - accuracy: 0.5641 - loss: 0.6750
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6665 - loss: 0.6606
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6362 - loss: 0.6513
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6954 - loss: 0.6129
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.7126 - loss: 0.6110
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.7252 - loss: 0.5965
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6855 - loss: 0.6276
Epoch 8/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6623 - loss: 0.6396
Epoch 9/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [9]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

# Define the input shape
input_shape = (12055, 15)

# Create the model
model = Sequential()
model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
model.fit(X_train, y_train, epochs=10, batch_size=32)
model.save('lstm_model.h5')

Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 1s/step - accuracy: 0.6750 - loss: 0.6926
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6817 - loss: 0.6794
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6801 - loss: 0.6368
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6817 - loss: 0.6378
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 1s/step - accuracy: 0.6975 - loss: 0.6247
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6654 - loss: 0.6443
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6545 - loss: 0.6432
Epoch 8/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.6729 - loss: 0.6494
Epoch 9/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [10]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Bidirectional

# Define the input shape
input_shape = (12055, 15)

# Create the model
model = Sequential()
model.add(Bidirectional(LSTM(128, return_sequences=True), input_shape=input_shape))
# model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(128, return_sequences=True)))
# model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(128, return_sequences=True)))
# model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(128)))
# model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
# model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()
model.fit(X_train, y_train, epochs=10, batch_size=32)
model.save('lstm_model.h5')

Epoch 1/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 4s/step - accuracy: 0.7196 - loss: 0.5943
Epoch 2/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 4s/step - accuracy: 0.7985 - loss: 0.4982
Epoch 3/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 4s/step - accuracy: 0.8208 - loss: 0.3739
Epoch 4/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 4s/step - accuracy: 0.7842 - loss: 0.3917
Epoch 5/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 4s/step - accuracy: 0.8859 - loss: 0.3411
Epoch 6/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 4s/step - accuracy: 0.9408 - loss: 0.1891
Epoch 7/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 4s/step - accuracy: 0.8839 - loss: 0.3067
Epoch 8/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 4s/step - accuracy: 0.9344 - loss: 0.1883
Epoch 9/10
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[