In [22]:
import numpy as np
import scipy.io
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import LogSoftmax
from torch.nn import Flatten
from torch.nn import Flatten
from torch.nn import Dropout
from torch.nn import Sigmoid

In [23]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [46]:
class DataLoader(Dataset):
    def __init__(self, data_mat, labels):
        self.data_mat = data_mat
        self.labels = labels

    def __len__(self):
        return self.data_mat.shape[2]

    def __getitem__(self, idx):
        mat = np.array(self.data_mat[:,:,idx], dtype=np.float32)
        mat_pad = np.zeros((43,13), dtype=np.float32)
        mat_pad[1:42, 1:11] = mat
        adj_mat = np.expand_dims(mat_pad, 0)
        label = [self.labels[idx], np.abs(1-self.labels[idx])]
        return torch.from_numpy(adj_mat), torch.from_numpy(np.array(label, dtype=np.float32))

In [47]:
class Model(nn.Module):
    def __init__(self, numChannels=1, classes=2):
        super().__init__()
        
        self.layer1 = nn.Sequential(
                            Conv2d(in_channels=numChannels, out_channels=32, kernel_size=(2,2)),
                            ReLU(),
                            MaxPool2d(kernel_size=(2, 2), stride=(2, 2)))
        
        self.layer2 = nn.Sequential(
                            Conv2d(in_channels=32, out_channels=64,kernel_size=(2,2)),
                            ReLU(),
                            MaxPool2d(kernel_size=(2, 2), stride=(2, 2)))

        self.flatten1 = Flatten(0,2)

        self.fc1 = nn.Sequential(
                            Linear(in_features=1280, out_features=128),
                            ReLU())

        self.dropout = Dropout(p=0.2)

        self.fc2 = nn.Sequential(
                            Linear(in_features=128, out_features=classes),
                            Sigmoid())

    def forward(self, x):
      # print("x: ", x.shape)
      x1 = self.layer1(x)
      # print("x1:", x1.shape)
      x2 = self.layer2(x1)
      # print("x2:", x2.shape)
      f1 = self.flatten1(x2)
      # print("f1:", f1.shape)
      f2 = self.fc1(f1)
      # print("f2:", f2.shape)
      f3 = self.dropout(f2)
      # print("f3:", f3.shape)
      out = self.fc2(f3)
      # print("out:", out.shape)
      return out

In [48]:
def train(dataloader, model, loss_fn, optimizer):
    size = dataloader.__len__
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        # Compute prediction error
        pred = model(X)
        #print("pred: ", pred, "y: ", y)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss = loss.item()
        #print(f"loss: {loss:>7f}")

In [49]:
def test(dataloader, model, loss_fn):
    size = len(dataloader)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            out = pred
            if(y[0] == 1):
              if(out[0] >= out[1]):
                tp += 1
              elif(out[0] < out[1]):
                #print(out)
                fn += 1
            elif(y[0] == 0):
              if(out[0] >= out[1]):
                fp += 1
              elif(out[0] < out[1]):
                tn += 1
    #print(test_loss, num_batches)
    test_loss /= num_batches
    correct /= size
    #print(f"Test Error: \n Metrics: {[tp, fp, tn, fn]}, Avg loss: {test_loss:>8f} \n")
    return [tp, fp, tn, fn], test_loss

In [20]:
import os
import scipy.io
import pandas as pd
import numpy as np

# Directory containing .mat files
folder_path = 'TFA/Train/'

# List all files in the directory
files = os.listdir(folder_path)

# Filter out .mat files


mat_files = [file for file in files if file.endswith('.mat')]

dfs_train = []
train_labels = []

for file in mat_files:
    file_path = os.path.join(folder_path, file)
    mat_data = scipy.io.loadmat(file_path)
    data = mat_data["tfaOut"]
    data = np.transpose(data, (1, 0, 2))
    
    # Creating labels
    label_column = np.zeros(len(data))
    label_column[:5000] = 0
    label_column[-5000:] = 0
    label_column[5000:-5000] = 1
    
    # Append data and labels to lists
    dfs_train.append(data)
    train_labels.append(label_column)

# Concatenate the lists into single numpy arrays
dfs_train = np.concatenate(dfs_train, axis=0)
train_labels = np.concatenate(train_labels, axis=0)
dfs_train = np.transpose(dfs_train, (1, 2, 0))
print("Data shape:", dfs_train.shape)
print("Labels shape:", train_labels.shape)


Data shape: (41, 10, 395290)
Labels shape: (395290,)


In [21]:
import os
import scipy.io
import pandas as pd
import numpy as np

# Directory containing .mat files
folder_path = 'TFA/Test/'

# List all files in the directory
files = os.listdir(folder_path)

# Filter out .mat files
mat_files = [file for file in files if file.endswith('.mat')]

dfs_test = []
test_labels = []

for file in mat_files:
    file_path = os.path.join(folder_path, file)
    mat_data = scipy.io.loadmat(file_path)
    data = mat_data["tfaOut"]
    data = np.transpose(data, (1, 0, 2))
    
    # Creating labels
    label_column = np.zeros(len(data))
    label_column[:5000] = 0
    label_column[-5000:] = 0
    label_column[5000:-5000] = 1
    
    # Append data and labels to lists
    dfs_test.append(data)
    test_labels.append(label_column)

# Concatenate the lists into single numpy arrays
dfs_test = np.concatenate(dfs_test, axis=0)
test_labels = np.concatenate(test_labels, axis=0)
dfs_test = np.transpose(dfs_test, (1, 2, 0))
print("Data shape:", dfs_test.shape)
print("Labels shape:", test_labels.shape)


Data shape: (41, 10, 47574)
Labels shape: (47574,)


In [18]:
dfs_test

array([[[4.46432128e-06, 3.02832006e-04, 5.71374953e-07, ...,
         3.76182902e-05, 1.09440838e-05, 4.72718120e-04],
        [2.08653004e-06, 1.41537322e-04, 2.67048657e-07, ...,
         1.75819991e-05, 5.11503501e-06, 2.20938525e-04],
        [9.54753126e-07, 6.47645604e-05, 1.22195960e-07, ...,
         8.04516027e-06, 2.34053456e-06, 1.01096914e-04],
        ...,
        [5.70243174e-25, 3.86817780e-23, 7.29836959e-26, ...,
         4.80511412e-24, 1.39792562e-24, 6.03819180e-23],
        [1.21697802e-25, 8.25522793e-24, 1.55757330e-26, ...,
         1.02547800e-24, 2.98336716e-25, 1.28863388e-23],
        [2.54495614e-26, 1.72634119e-24, 3.25721224e-27, ...,
         2.14448944e-25, 6.23884607e-26, 2.69480354e-24]],

       [[5.74683736e-06, 2.17666275e-04, 9.58984523e-07, ...,
         1.15746436e-04, 1.16334851e-05, 1.59876648e-04],
        [2.68362615e-06, 1.01644586e-04, 4.47821259e-07, ...,
         5.40506268e-05, 5.43254018e-06, 7.46583081e-05],
        [1.22697242e-06, 

In [65]:
# # mat = scipy.io.loadmat('dataset/theta_A_W_uni11_100ms_080.mat')["W"]

# # # Calculate the number of samples in the data
# # num_samples = mat.shape[2]

# # # Labels initialization
# # labels = np.zeros(num_samples)

# # # Set first 250 and last 250 labels to 0
# # labels[:250] = 0
# # labels[-250:] = 0

# # # Calculate the number of '1's for the middle part
# # num_ones_middle = num_samples - 500

# # # Set the middle part labels to 1
# # labels[250:250 + num_ones_middle] = 1

# # # Indices setup
# # a1 = np.arange(0, 250)
# # a2 = np.arange(250, 250 + num_ones_middle)
# # a3 = np.arange(250 + num_ones_middle, num_samples)


# # t1 = list(np.random.choice(a1, int(len(a1)*0.1), replace=False))
# # tr1 = list(set(a1)-set(t1))
# # t2 = list(np.random.choice(a2, int(len(a2)*0.1), replace=False))
# # tr2 = list(set(a2)-set(t2))
# # t3 = list(np.random.choice(a3, int(len(a3)*0.1), replace=False))
# # tr3 = list(set(a3)-set(t3))

# # t = t1 + t2 + t3
# # tr = tr1 + tr2 + tr3
# # test_data = mat[:,:,t]
# # train_data = mat[:,:,tr]
# # test_labels = labels[t]
# # train_labels = labels[tr]

# # print(train_data.shape)

# # print(len(t), len(tr))

# import os
# import numpy as np
# import scipy.io
# import networkx as nx

# # function to calculate various centrality measures of a graph with given adjacency matrix
# def calculate_centrality_measures(adjacency_matrix):
#     # Convert adjacency matrix to a networkx graph
#     G = nx.from_numpy_array(adjacency_matrix)

#     # # Calculate clustering coefficient for each node
#     # clustering_coefficients = nx.clustering(G)

#     # # Convert clustering coefficients to an array in the same order as nodes in the graph
#     # coefficients_array = np.array([clustering_coefficients[node] for node in range(len(G))])

#     # return coefficients_array

#     # Calculate eigenvector centrality for each node
#     eigenvector_centrality = nx.eigenvector_centrality_numpy(G)

#     # Convert eigenvector centralities to an array in the same order as nodes in the graph
#     centralities_array = np.array([eigenvector_centrality[node] for node in range(len(G))])

#     return centralities_array

# # Path to the folder containing the files
# folder_path = 'dataset'

# # Lists to append the data and labels
# train_data_all = []
# train_labels_all = []
# test_data_all = []
# test_labels_all = []

# # List all files in the folder
# file_names = os.listdir(folder_path)

# # Loop through only one file
# for file_name in file_names:
#     # Load data from each file
#     mat1 = scipy.io.loadmat(os.path.join(folder_path, file_name))["W"]

#     mat = np.zeros((10, 11, mat1.shape[2]))
#     # append the clustering coefficient of the graph to the adjacency matrix
#     for i in range(mat.shape[2]):
#         # if i == 0:
#         #     print(mat1[:,:,i].shape)
#         #     print(calculate_centrality_measures(mat1[:,:,i]).flatten().shape)
#         clustering_coefficients = calculate_centrality_measures(mat1[:,:,i]).flatten()
#         # convert (48,48) adjacency matrix to (48,49) matrix
#         mat[:,:,i] = np.concatenate((mat1[:,:,i], clustering_coefficients.reshape(10,1)), axis=1)
#         # if i == 0:
#         #     print(mat[:,:,i].shape)

    
#     # Calculate the number of samples in the data
#     num_samples = mat.shape[2]
    
#     # Labels initialization
#     labels = np.zeros(num_samples)
    
#     # Set first 250 and last 250 labels to 0
#     labels[:250] = 0
#     labels[-250:] = 0
    
#     # Calculate the number of '1's for the middle part
#     num_ones_middle = num_samples - 500
    
#     # Set the middle part labels to 1
#     labels[250:250 + num_ones_middle] = 1
    
#     # Randomly shuffle the labels
#     np.random.shuffle(labels)
    
#     # Indices setup
#     a1 = np.arange(0, 250)
#     a2 = np.arange(250, 250 + num_ones_middle)
#     a3 = np.arange(250 + num_ones_middle, num_samples)

#     # Splitting indices for test and train sets
#     t1 = list(np.random.choice(a1, int(len(a1) * 0.1), replace=False))
#     tr1 = list(set(a1) - set(t1))
#     t2 = list(np.random.choice(a2, int(len(a2) * 0.1), replace=False))
#     tr2 = list(set(a2) - set(t2))
#     t3 = list(np.random.choice(a3, int(len(a3) * 0.1), replace=False))
#     tr3 = list(set(a3) - set(t3))
    
#     # Combine indices
#     t = t1 + t2 + t3
#     tr = tr1 + tr2 + tr3
    
#     # Split data
#     test_data = mat[:, :, t]
#     train_data = mat[:, :, tr]
#     train_labels = labels[tr]
#     test_labels = labels[t]

#     print(train_data.shape)

#     # Append the data and labels to the lists
#     train_data_all.append(train_data)
#     train_labels_all.append(train_labels)
#     test_data_all.append(test_data)
#     test_labels_all.append(test_labels)
#     break


# print("=====================================")
# # Find the maximum number of samples among accumulated data
# max_samples = max(data.shape[2] for data in train_data_all)

# # # Pad zeros at the end of each array to make them all 48x48x(max_samples)
# # train_data_padded = []
# # test_data_padded = []
# # for data in train_data_all:
# #     size = 48 - data.shape[0]
# #     size//=2
# #     padded_data = np.pad(data, ((size, size), (size, size), (0, 0)), mode='constant')
# #     train_data_padded.append(padded_data)
# #     print(padded_data.shape)
# # for data in test_data_all:
# #     size = 48 - data.shape[0]
# #     size//=2
# #     padded_data = np.pad(data, ((size, size), (size, size), (0, 0)), mode='constant')
# #     test_data_padded.append(padded_data)

# # Concatenate accumulated data and labels
# train_data_concatenated = np.concatenate(train_data_all, axis=2)
# train_labels_concatenated = np.concatenate(train_labels_all, axis=0)
# test_data_concatenated = np.concatenate(test_data_all, axis=2)
# test_labels_concatenated = np.concatenate(test_labels_all, axis=0)

# # Print the shape of concatenated data
# print("Shape of concatenated training data:", train_data_concatenated.shape)
# print("Shape of concatenated training labels:", train_labels_concatenated.shape)
# print("Shape of concatenated testing data:", test_data_concatenated.shape)
# print("Shape of concatenated testing labels:", test_labels_concatenated.shape)

(10, 11, 2194)
Shape of concatenated training data: (10, 11, 2194)
Shape of concatenated training labels: (2194,)
Shape of concatenated testing data: (10, 11, 243)
Shape of concatenated testing labels: (243,)


In [50]:
model = Model().to(device)
print(model)

Model(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(2, 2), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (flatten1): Flatten(start_dim=0, end_dim=2)
  (fc1): Sequential(
    (0): Linear(in_features=1280, out_features=128, bias=True)
    (1): ReLU()
  )
  (dropout): Dropout(p=0.2, inplace=False)
  (fc2): Sequential(
    (0): Linear(in_features=128, out_features=2, bias=True)
    (1): Sigmoid()
  )
)


In [52]:
train_dataloader = DataLoader(dfs_train, train_labels)
test_dataloader = DataLoader(dfs_test, test_labels)
model = Model().to(device)
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-6)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20], gamma=0.001)

epochs = 1
max_c = -1
max_c_epoch = -1
min_tl = 9999
min_tl_epoch = 9999

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    model.train()  # set the model to train mode
    total_loss = 0.0
    
    for i, (data, labels) in enumerate(train_dataloader):
        data, labels = data.to(device), labels.to(device)
        
        optimizer.zero_grad()  # clear gradients
        
        outputs = model(data)
        loss = loss_fn(outputs, labels)
        loss.backward()  # backpropagation
        optimizer.step()  # update weights
        
        total_loss += loss.item()
    
    average_train_loss = total_loss / len(train_dataloader)
    print(f"Training Loss: {average_train_loss}")
    
    # testing
    model.eval()  # set the model to evaluation mode
    c, tl = test(test_dataloader, model, loss_fn)
    acc = (c[0] + c[2])/np.sum(c)
    pre = c[0]/(c[0]+c[1])
    sen = c[0]/(c[0]+c[3])
    spe = c[2]/(c[1]+c[2])
    f_score = 2*(pre*sen)/(pre+sen)
    
    print("acc: ", acc, "pre: ", pre, "rec: ", sen, "spe: ", spe, "f_score: ", f_score, "c: ", c)
    print(f"Testing Loss: {tl}")
    
print("Done!")


Epoch 1
-------------------------------
Training Loss: 1.627036329504261
acc:  0.5796023037793753 pre:  0.5796023037793753 rec:  1.0 spe:  0.0 f_score:  0.7338585191888007 c:  [27574, 20000, 0, 0]
Testing Loss: 2.9018201285639633
Done!
