In [1]:
import os
import scipy.io
import pandas as pd
import numpy as np


# Directory containing .mat files
folder_path = 'TFA/Train/'

# List all files in the directory
files = os.listdir(folder_path)

# Filter out .mat files
mat_files = [file for file in files if file.endswith('.mat')]


### Spliting the data into train and test as follows: 8 persons data for training and 2 persons data for testing 

In [2]:
dfs = []

for file in mat_files:
    file_path = os.path.join(folder_path, file)
    mat_data = scipy.io.loadmat(file_path)
    # Assuming your data is stored in 'tfaOut' key
    data = mat_data["tfaOut"]
    print(np.shape(data))
    # Transpose data to swap the dimensions
    transposed_data = np.transpose(data, (1, 0, 2))
    # Reshape transposed data to 2D array
    shape = transposed_data.shape
    reshaped_data = transposed_data.reshape(shape[0], -1)
    df = pd.DataFrame(reshaped_data)
    print(np.shape(df))
    label_column = np.zeros(len(df))  # Initialize with zeros
    label_column[:5000] = 0 # First 5000 rows get value 0
    label_column[-5000:] = 0  # Last 5000 rows get value 0
    label_column[5000:-5000] = 1
    df['label'] = label_column 
    dfs.append(df)
# Concatenate all DataFrames into a single DataFrame
df_train = pd.concat(dfs, ignore_index=True)


(41, 20258, 10)
(20258, 410)
(41, 89167, 10)
(89167, 410)
(41, 85351, 10)
(85351, 410)
(41, 18127, 10)
(18127, 410)
(41, 21829, 10)
(21829, 410)
(41, 77912, 10)
(77912, 410)
(41, 33906, 10)
(33906, 410)
(41, 48740, 10)
(48740, 410)


In [3]:
df_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,401,402,403,404,405,406,407,408,409,label
0,0.000035,0.000019,0.000025,7.008969e-07,9.499883e-05,0.001664,0.000027,0.000035,0.000575,0.000053,...,1.100189e-25,1.433261e-25,3.995573e-27,5.415557e-25,9.486691e-24,1.544730e-25,1.997394e-25,3.277730e-24,3.037160e-25,0.0
1,0.000029,0.000100,0.000027,4.948846e-07,2.699616e-05,0.002179,0.000141,0.000135,0.000781,0.000122,...,5.432920e-25,1.477424e-25,2.689030e-27,1.466876e-25,1.184130e-23,7.653895e-25,7.316077e-25,4.244378e-24,6.611510e-25,0.0
2,0.000019,0.000222,0.000024,3.986104e-06,5.100606e-07,0.002535,0.000308,0.000274,0.000936,0.000194,...,1.037810e-24,1.134149e-25,1.860307e-26,2.380443e-27,1.183297e-23,1.439358e-24,1.276510e-24,4.370623e-24,9.056497e-25,0.0
3,0.000010,0.000349,0.000017,8.104923e-06,1.082350e-05,0.002651,0.000478,0.000411,0.001003,0.000248,...,1.222853e-24,5.901307e-26,2.841358e-26,3.794415e-26,9.293149e-24,1.674689e-24,1.440337e-24,3.517460e-24,8.697190e-25,0.0
4,0.000004,0.000439,0.000008,9.835971e-06,4.195622e-05,0.002500,0.000599,0.000505,0.000965,0.000268,...,9.375238e-25,1.626744e-26,2.098587e-26,8.951713e-26,5.334137e-24,1.277743e-24,1.077082e-24,2.057989e-24,5.717150e-25,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395285,0.000520,0.000311,0.000435,5.806670e-04,6.472682e-04,0.000886,0.000476,0.000614,0.000964,0.000142,...,2.594366e-25,3.622873e-25,4.837371e-25,5.392206e-25,7.380861e-25,3.964984e-25,5.117189e-25,8.026859e-25,1.184336e-25,0.0
395286,0.000575,0.000270,0.000535,7.006664e-04,8.236793e-04,0.001062,0.000815,0.000781,0.001238,0.000121,...,5.768042e-25,1.140592e-24,1.494167e-24,1.756492e-24,2.263834e-24,1.739039e-24,1.665312e-24,2.640725e-24,2.576752e-25,0.0
395287,0.000561,0.000192,0.000582,7.519064e-04,9.366418e-04,0.001136,0.001217,0.000885,0.001411,0.000083,...,6.734301e-25,2.039456e-24,2.635312e-24,3.282780e-24,3.981134e-24,4.266449e-24,3.101012e-24,4.944687e-24,2.917922e-25,0.0
395288,0.000481,0.000102,0.000562,7.200209e-04,9.593292e-04,0.001088,0.001631,0.000901,0.001436,0.000043,...,4.761704e-25,2.623168e-24,3.359977e-24,4.476709e-24,5.076395e-24,7.609251e-24,4.204713e-24,6.700812e-24,1.985795e-25,0.0


In [3]:
# Directory containing .mat files
folder_path = 'TFA/Test/'

# List all files in the directory
files = os.listdir(folder_path)

# Filter out .mat files
mat_files = [file for file in files if file.endswith('.mat')]
# Load each .mat file
dfs = []

for file in mat_files:
    file_path = os.path.join(folder_path, file)
    mat_data = scipy.io.loadmat(file_path)
    # Assuming your data is stored in 'tfaOut' key
    data = mat_data["tfaOut"]
    print(np.shape(data))
    # Transpose data to swap the dimensions
    transposed_data = np.transpose(data, (1, 0, 2))
    # Reshape transposed data to 2D array
    shape = transposed_data.shape
    reshaped_data = transposed_data.reshape(shape[0], -1)
    df = pd.DataFrame(reshaped_data)
    print(np.shape(df))
    label_column = np.zeros(len(df))  # Initialize with zeros
    label_column[:5000] = 0 # First 5000 rows get value 0
    label_column[-5000:] = 0  # Last 5000 rows get value 0
    label_column[5000:-5000] = 1
    df['label'] = label_column 
    dfs.append(df)
# Concatenate all DataFrames into a single DataFrame
df_test = pd.concat(dfs, ignore_index=True)



(41, 22249, 10)
(22249, 410)
(41, 25325, 10)
(25325, 410)


In [5]:
df_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,401,402,403,404,405,406,407,408,409,label
0,0.000004,0.000303,5.713750e-07,1.040541e-05,9.201673e-07,9.036281e-06,9.542928e-05,3.761829e-05,0.000011,4.727181e-04,...,1.726341e-24,3.257212e-27,5.931768e-26,5.245557e-27,5.151273e-26,5.440095e-25,2.144489e-25,6.238846e-26,2.694804e-24,0.0
1,0.000006,0.000218,9.589845e-07,1.131843e-05,7.895691e-07,8.519349e-06,8.149424e-05,1.157464e-04,0.000012,1.598766e-04,...,1.182759e-24,5.210946e-27,6.150229e-26,4.290374e-27,4.629259e-26,4.428248e-25,6.289450e-25,6.321423e-26,8.687405e-25,0.0
2,0.000006,0.000137,1.267444e-06,1.090369e-05,6.051599e-07,7.468137e-06,6.170625e-05,2.077713e-04,0.000011,1.557719e-05,...,6.397000e-25,5.915991e-27,5.089464e-26,2.824677e-27,3.485869e-26,2.880234e-25,9.698046e-25,5.356860e-26,7.270894e-26,0.0
3,0.000005,0.000073,1.387696e-06,9.281459e-06,3.885006e-07,6.163789e-06,4.011502e-05,2.827980e-04,0.000011,1.358481e-05,...,2.545122e-25,4.866958e-27,3.255213e-26,1.362557e-27,2.161777e-26,1.406922e-25,9.918349e-25,3.719183e-26,4.764492e-26,0.0
4,0.000004,0.000030,1.283622e-06,6.909502e-06,1.837786e-07,4.848268e-06,2.100184e-05,3.178725e-04,0.000009,9.498834e-05,...,6.318398e-26,2.741910e-27,1.475920e-26,3.925644e-28,1.035625e-26,4.486146e-26,6.789986e-25,1.971091e-26,2.029020e-25,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47569,0.000017,0.000003,6.952939e-05,2.463632e-06,3.954706e-06,6.739955e-10,1.596383e-06,3.265827e-08,0.000005,5.031395e-07,...,2.375570e-27,5.777654e-26,2.047194e-27,3.286226e-27,5.600672e-31,1.326540e-27,2.713791e-29,4.000078e-27,4.180917e-28,0.0
47570,0.000019,0.000006,1.071067e-04,1.387205e-06,2.486203e-06,4.907807e-08,1.226702e-06,2.026941e-06,0.000004,3.772047e-08,...,1.284550e-26,2.281608e-25,2.955053e-27,5.296163e-27,1.045471e-28,2.613146e-27,4.317833e-27,9.190610e-27,8.035294e-29,0.0
47571,0.000020,0.000010,1.466876e-04,4.450132e-07,1.044123e-06,1.712960e-07,7.219777e-07,9.901923e-06,0.000003,1.756362e-07,...,3.509195e-26,5.138670e-25,1.558944e-27,3.657710e-27,6.000738e-28,2.529189e-27,3.468782e-26,1.203328e-26,6.152783e-28,0.0
47572,0.000018,0.000014,1.811655e-04,5.676815e-09,1.342265e-07,3.434736e-07,2.673473e-07,2.339456e-05,0.000002,1.121066e-06,...,6.535044e-26,8.452465e-25,2.648578e-29,6.262479e-28,1.602512e-27,1.247337e-27,1.091498e-25,1.094505e-26,5.230450e-27,0.0


In [4]:
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader


In [5]:
import tensorflow as tf

def split_features_target(data):
  # Assuming the label is in the last column
  features = data[:, :-1]  # All columns except the last
  target = data[:, -1]  # Last column
  features = torch.tensor(features)
  target = torch.tensor(target)
  return features, target

# Split features and target
X_train, y_train = split_features_target(df_train.to_numpy())
y_train = tf.expand_dims(y_train, axis=1)

X_test,y_test = split_features_target(df_test.to_numpy())
y_test = tf.expand_dims(y_test, axis=1)


2024-04-09 15:06:43.078533: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-09 15:06:43.126851: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-09 15:06:43.126909: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-09 15:06:43.126965: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-09 15:06:43.137057: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: A

In [6]:
print(X_train.shape , y_train.shape)

torch.Size([395290, 410]) (395290, 1)


### CNN

In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the input shape
input_shape = (410,)  # Assuming each sample has 10 features

# Define the CNN model
model = models.Sequential([
    layers.Reshape(input_shape + (1,), input_shape=input_shape),  # Reshape input to match (10, 1) for grayscale
    layers.Conv1D(32, kernel_size=3, activation='relu'),           # Convolutional layer with 32 filters
    layers.MaxPooling1D(pool_size=2),                              # Max pooling layer
    layers.Flatten(),                                              # Flatten layer
    layers.Dense(128, activation='relu'),                         # Dense layer with 128 neurons
    layers.Dropout(0.5),                                           # Dropout layer with dropout rate of 0.5
    layers.Dense(1, activation='sigmoid')                          # Output layer with 1 neuron and sigmoid activation
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Display the model summary
model.summary()
import numpy as np
from sklearn.model_selection import train_test_split

# Assuming X_train and y_train are your input and target data
X_train_np = np.array(X_train)
y_train_np = np.array(y_train)

history = model.fit(X_train_np, y_train_np, epochs=2, batch_size=32)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape (Reshape)           (None, 410, 1)            0         
                                                                 
 conv1d (Conv1D)             (None, 408, 32)           128       
                                                                 
 max_pooling1d (MaxPooling1D  (None, 204, 32)          0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 6528)              0         
                                                                 
 dense (Dense)               (None, 128)               835712    
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                        

2024-04-09 15:04:41.341002: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 648275600 exceeds 10% of free system memory.




KeyboardInterrupt: 

In [None]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, confusion_matrix, accuracy_score

X_test_np = np.array(X_test)
y_test_np = np.array(y_test)

# Assuming X_test_np is your test data
X_test_np = np.array(X_test_np)

# Predict probabilities for each class
y_pred_prob = model.predict(X_test_np)

# Convert probabilities to class labels
y_pred = (y_pred_prob > 0.5).astype(int)

# Calculate precision
precision = precision_score(y_test_np, y_pred)

# Calculate recall
recall = recall_score(y_test_np, y_pred)

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test_np, y_pred)

# Calculate accuracy
accuracy = accuracy_score(y_test_np, y_pred)

# Print the results
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:\n", conf_matrix)
print("Accuracy:", accuracy)


Precision: 0.9587043274821376
Recall: 0.9993775869210836
Confusion Matrix:
 [[   1564   80436]
 [   1163 1867371]]
Accuracy: 0.9581658151049918


### Simple NN

In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the input shape
input_shape = (410,)  # Assuming each sample has 10 features

# Define the neural network model
model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=input_shape),  # Dense layer with 64 neurons and ReLU activation
    layers.Dense(64, activation='relu'),                            # Dense layer with 32 neurons and ReLU activation
    layers.Dropout(0.1),                                             # Dropout layer with dropout rate of 0.5
    layers.Dense(1, activation='sigmoid')                            # Output layer with 1 neuron and sigmoid activation
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Display the model summary
model.summary()

import numpy as np
from sklearn.model_selection import train_test_split

# Assuming X_train and y_train are your input and target data
X_train_np = np.array(X_train)
y_train_np = np.array(y_train)

history = model.fit(X_train_np, y_train_np, epochs=20, batch_size=32)



Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 128)               52608     
                                                                 
 dense_7 (Dense)             (None, 64)                8256      
                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                                 
 dense_8 (Dense)             (None, 1)                 65        
                                                                 
Total params: 60,929
Trainable params: 60,929
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20


2024-04-09 15:07:43.872360: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 648275600 exceeds 10% of free system memory.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [10]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, confusion_matrix, accuracy_score

X_test_np = np.array(X_test)
y_test_np = np.array(y_test)

# Assuming X_test_np is your test data
X_test_np = np.array(X_test_np)

# Predict probabilities for each class
y_pred_prob = model.predict(X_test_np)

# Convert probabilities to class labels
y_pred = (y_pred_prob > 0.5).astype(int)

# Calculate precision
precision = precision_score(y_test_np, y_pred)

# Calculate recall
recall = recall_score(y_test_np, y_pred)

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test_np, y_pred)

# Calculate accuracy
accuracy = accuracy_score(y_test_np, y_pred)

# Print the results
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:\n", conf_matrix)
print("Accuracy:", accuracy)


Precision: 0.5794520259898648
Recall: 0.9993834771886559
Confusion Matrix:
 [[    0 20000]
 [   17 27557]]
Accuracy: 0.5792449657375878
