In [4]:
import numpy as np
import scipy.io
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import LogSoftmax
from torch.nn import Flatten
from torch.nn import Flatten
from torch.nn import Dropout
from torch.nn import Sigmoid

In [5]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [6]:
class DataLoader(Dataset):
    def __init__(self, data_mat, labels):
        self.data_mat = data_mat
        self.labels = labels

    def __len__(self):
        return self.data_mat.shape[2]

    def __getitem__(self, idx):
        mat = np.array(self.data_mat[:,:,idx], dtype=np.float32)
#         mat_pad = np.zeros((12,13), dtype=np.float32)
#         mat_pad[1:11, 1:12] = mat
        mat_pad = np.zeros((12,12), dtype=np.float32)
        mat_pad[1:11, 1:11] = mat
        adj_mat = np.expand_dims(mat_pad, 0)
        label = [self.labels[idx], np.abs(1-self.labels[idx])]
        return torch.from_numpy(adj_mat), torch.from_numpy(np.array(label, dtype=np.float32))

In [7]:
class Model(nn.Module):
    def __init__(self, numChannels=1, classes=2):
        super().__init__()
        
        self.layer1 = nn.Sequential(
                            Conv2d(in_channels=numChannels, out_channels=32, kernel_size=(2,2)),
                            ReLU(),
                            MaxPool2d(kernel_size=(2, 2), stride=(2, 2)))
        
        self.layer2 = nn.Sequential(
                            Conv2d(in_channels=32, out_channels=64,kernel_size=(2,2)),
                            ReLU(),
                            MaxPool2d(kernel_size=(2, 2), stride=(2, 2)))

        self.flatten1 = Flatten(0,2)

        self.fc1 = nn.Sequential(
                            Linear(in_features=256, out_features=128),
                            ReLU())

        self.dropout = Dropout(p=0.2)

        self.fc2 = nn.Sequential(
                            Linear(in_features=128, out_features=classes),
                            Sigmoid())

    def forward(self, x):
      #print("x: ", x.shape)
      x1 = self.layer1(x)
      #print("x1:", x1.shape)
      x2 = self.layer2(x1)
      #print("x2:", x2.shape)
      f1 = self.flatten1(x2)
      #print("f1:", f1.shape)
      f2 = self.fc1(f1)
      #print("f2:", f2.shape)
      f3 = self.dropout(f2)
      #print("f3:", f3.shape)
      out = self.fc2(f3)
      #print("out:", out.shape)
      return out

In [8]:
def train(dataloader, model, loss_fn, optimizer):
    size = dataloader.__len__
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        # Compute prediction error
        pred = model(X)
        #print("pred: ", pred, "y: ", y)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss = loss.item()
        #print(f"loss: {loss:>7f}")

In [9]:
def test(dataloader, model, loss_fn):
    size = len(dataloader)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            out = pred
            if(y[0] == 1):
              if(out[0] >= out[1]):
                tp += 1
              elif(out[0] < out[1]):
                #print(out)
                fn += 1
            elif(y[0] == 0):
              if(out[0] >= out[1]):
                fp += 1
              elif(out[0] < out[1]):
                tn += 1
    #print(test_loss, num_batches)
    test_loss /= num_batches
    correct /= size
    #print(f"Test Error: \n Metrics: {[tp, fp, tn, fn]}, Avg loss: {test_loss:>8f} \n")
    return [tp, fp, tn, fn], test_loss

In [23]:
# mat = scipy.io.loadmat('dataset/theta_A_W_uni11_100ms_080.mat')["W"]

# # Calculate the number of samples in the data
# num_samples = mat.shape[2]

# # Labels initialization
# labels = np.zeros(num_samples)

# # Set first 250 and last 250 labels to 0
# labels[:250] = 0
# labels[-250:] = 0

# # Calculate the number of '1's for the middle part
# num_ones_middle = num_samples - 500

# # Set the middle part labels to 1
# labels[250:250 + num_ones_middle] = 1

# # Indices setup
# a1 = np.arange(0, 250)
# a2 = np.arange(250, 250 + num_ones_middle)
# a3 = np.arange(250 + num_ones_middle, num_samples)


# t1 = list(np.random.choice(a1, int(len(a1)*0.1), replace=False))
# tr1 = list(set(a1)-set(t1))
# t2 = list(np.random.choice(a2, int(len(a2)*0.1), replace=False))
# tr2 = list(set(a2)-set(t2))
# t3 = list(np.random.choice(a3, int(len(a3)*0.1), replace=False))
# tr3 = list(set(a3)-set(t3))

# t = t1 + t2 + t3
# tr = tr1 + tr2 + tr3
# test_data = mat[:,:,t]
# train_data = mat[:,:,tr]
# test_labels = labels[t]
# train_labels = labels[tr]

# print(train_data.shape)

# print(len(t), len(tr))

import os
import numpy as np
import scipy.io
import networkx as nx

# Path to the folder containing the files
folder_path = '/kaggle/input/dpcn-dataset/dataset'
# folder_path = '/kaggle/input/dpcn-dataset-euclidean'

# function to calculate various centrality measures of a graph with given adjacency matrix
def calculate_centrality_measures(adjacency_matrix):
    # Convert adjacency matrix to a networkx graph
    G = nx.from_numpy_array(adjacency_matrix)

#     # Calculate clustering coefficient for each node
#     centrality_measure = nx.clustering(G)

#     # Calculate eigenvector centrality for each node
#     centrality_measure = nx.eigenvector_centrality_numpy(G)

#     # Calculate degree centrality for each node
#     centrality_measure = nx.degree_centrality(G)

#     # Calculate closeness centrality for each node
#     centrality_measure = nx.closeness_centrality(G)

    # Calculate betweenness centrality for each node
    centrality_measure = nx.betweenness_centrality(G)

    # Convert eigenvector centralities to an array in the same order as nodes in the graph
    centralities_array = np.array([centrality_measure[node] for node in range(len(G))])

    return centralities_array

# Lists to append the data and labels
train_data_all = []
train_labels_all = []
test_data_all = []
test_labels_all = []

# List all files in the folder
file_names = os.listdir(folder_path)

# Loop through only one file
for file_name in file_names:
    # Load data from each file
#     mat1 = scipy.io.loadmat(os.path.join(folder_path, file_name))["W"]
    mat = scipy.io.loadmat(os.path.join(folder_path, file_name))["W"]

#     mat = np.zeros((10, 11, mat1.shape[2]))
#     # append the clustering coefficient of the graph to the adjacency matrix
#     for i in range(mat.shape[2]):
#         if i == 0:
#             print(mat1[:,:,i].shape)
#             print(calculate_centrality_measures(mat1[:,:,i]).flatten().shape)
#         centrality_measure = calculate_centrality_measures(mat1[:,:,i]).flatten()
#         # convert (48,48) adjacency matrix to (48,49) matrix
#         mat[:,:,i] = np.concatenate((mat1[:,:,i], centrality_measure.reshape(10,1)), axis=1)
#         if i == 0:
#             print(mat[:,:,i].shape)
    # Calculate the number of samples in the data
    num_samples = mat.shape[2]
    
    # Labels initialization
    labels = np.zeros(num_samples)
    
    # Set first 250 and last 250 labels to 0
    labels[:500] = 0
    labels[-250:] = 0
    
    # Calculate the number of '1's for the middle part
    num_ones_middle = num_samples - 750
    
    # Set the middle part labels to 1
    labels[500:-250] = 1
    
#     # Randomly shuffle the labels
#     np.random.shuffle(labels)
    
    # Indices setup
    a1 = np.arange(0, 500)
    a2 = np.arange(500, 500 + num_ones_middle)
    a3 = np.arange(500 + num_ones_middle, num_samples)

    # Splitting indices for test and train sets
    t1 = list(np.random.choice(a1, int(len(a1) * 0.1), replace=False))
    tr1 = list(set(a1) - set(t1))
    t2 = list(np.random.choice(a2, int(len(a2) * 0.1), replace=False))
    tr2 = list(set(a2) - set(t2))
    t3 = list(np.random.choice(a3, int(len(a3) * 0.1), replace=False))
    tr3 = list(set(a3) - set(t3))
    
    # Combine indices
    t = t1 + t2 + t3
    tr = tr1 + tr2 + tr3
    
    # Split data
    test_data = mat[:, :, t]
    train_data = mat[:, :, tr]
    train_labels = labels[tr]
    test_labels = labels[t]

    print(train_data.shape)

    # Append the data and labels to the lists
    train_data_all.append(train_data)
    train_labels_all.append(train_labels)
    test_data_all.append(test_data)
    test_labels_all.append(test_labels)
    break


print("=====================================")
# Find the maximum number of samples among accumulated data
max_samples = max(data.shape[2] for data in train_data_all)

# # Pad zeros at the end of each array to make them all 48x48x(max_samples)
# train_data_padded = []
# test_data_padded = []
# for data in train_data_all:
#     size = 48 - data.shape[0]
#     size//=2
#     padded_data = np.pad(data, ((size, size), (size, size), (0, 0)), mode='constant')
#     train_data_padded.append(padded_data)
#     print(padded_data.shape)
# for data in test_data_all:
#     size = 48 - data.shape[0]
#     size//=2
#     padded_data = np.pad(data, ((size, size), (size, size), (0, 0)), mode='constant')
#     test_data_padded.append(padded_data)

# Concatenate accumulated data and labels
train_data_concatenated = np.concatenate(train_data_all, axis=2)
train_labels_concatenated = np.concatenate(train_labels_all, axis=0)
test_data_concatenated = np.concatenate(test_data_all, axis=2)
test_labels_concatenated = np.concatenate(test_labels_all, axis=0)

# Print the shape of concatenated data
print("Shape of concatenated training data:", train_data_concatenated.shape)
print("Shape of concatenated training labels:", train_labels_concatenated.shape)
print("Shape of concatenated testing data:", test_data_concatenated.shape)
print("Shape of concatenated testing labels:", test_labels_concatenated.shape)

(10, 10, 982)
Shape of concatenated training data: (10, 10, 982)
Shape of concatenated training labels: (982,)
Shape of concatenated testing data: (10, 10, 109)
Shape of concatenated testing labels: (109,)


In [24]:
model = Model().to(device)
print(model)

Model(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(2, 2), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (flatten1): Flatten(start_dim=0, end_dim=2)
  (fc1): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ReLU()
  )
  (dropout): Dropout(p=0.2, inplace=False)
  (fc2): Sequential(
    (0): Linear(in_features=128, out_features=2, bias=True)
    (1): Sigmoid()
  )
)


In [26]:
train_dataloader = DataLoader(train_data_concatenated, train_labels_concatenated)
test_dataloader = DataLoader(test_data_concatenated, test_labels_concatenated)
model = Model().to(device)
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20], gamma=0.001)

epochs = 50
max_c = -1
max_c_epoch = -1
min_tl = 9999
min_tl_epoch = 9999

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    #test(train_dataloader, model, loss_fn)
    c, tl = test(test_dataloader, model, loss_fn)
    acc = (c[0] + c[2])/np.sum(c)
    pre = c[0]/(c[0]+c[1]+1)
    sen = c[0]/(c[0]+c[3]+1)
    spe = c[2]/(c[1]+c[2])
    f_score = 2*(pre*sen)/(pre+sen +1)
    print("acc: ", acc, "pre: ", pre, "rec: ", sen, "spe: ", spe, "f_score: ", f_score, "c: ", c, "loss:", tl)
print("Done!")

Epoch 1
-------------------------------
acc:  0.6880733944954128 pre:  0.0 rec:  0.0 spe:  1.0 f_score:  0.0 c:  [0, 0, 75, 34] loss: 0.6244773093713533
Epoch 2
-------------------------------
acc:  0.6880733944954128 pre:  0.0 rec:  0.0 spe:  1.0 f_score:  0.0 c:  [0, 0, 75, 34] loss: 0.6221659902585756
Epoch 3
-------------------------------
acc:  0.6880733944954128 pre:  0.0 rec:  0.0 spe:  1.0 f_score:  0.0 c:  [0, 0, 75, 34] loss: 0.6226406986013465
Epoch 4
-------------------------------
acc:  0.6880733944954128 pre:  0.0 rec:  0.0 spe:  1.0 f_score:  0.0 c:  [0, 0, 75, 34] loss: 0.6235991289856238
Epoch 5
-------------------------------
acc:  0.6880733944954128 pre:  0.0 rec:  0.0 spe:  1.0 f_score:  0.0 c:  [0, 0, 75, 34] loss: 0.6224864133454244
Epoch 6
-------------------------------
acc:  0.6880733944954128 pre:  0.0 rec:  0.0 spe:  1.0 f_score:  0.0 c:  [0, 0, 75, 34] loss: 0.6224473274629051
Epoch 7
-------------------------------
acc:  0.6880733944954128 pre:  0.0 rec:  0