In [39]:
import os
import sys
import torch
import torch.nn.functional as F
from imports.ABIDEDataset import ABIDEDataset
from sklearn.metrics._regression import r2_score
import dgl


In [40]:
dataset = ABIDEDataset("data/data/Output","ABCD")
dataset.data.x[dataset.data.x == float('inf')] = 0

In [41]:
dataset

ABCD(8836)

In [42]:
dataset.data.x.shape

torch.Size([468308, 53])

In [43]:
reshape_dataset = dataset.data.x.reshape(8836, 53, 53)
reshape_dataset.shape

torch.Size([8836, 53, 53])

In [44]:
reshape_dataset[0]

tensor([[ 0.0000,  0.1797,  0.0746,  ..., -0.0875, -0.0709, -0.1091],
        [ 0.1797,  0.0000,  0.4158,  ..., -0.2620, -0.1035, -0.1899],
        [ 0.0746,  0.4158,  0.0000,  ..., -0.4851, -0.2242, -0.5243],
        ...,
        [-0.0875, -0.2620, -0.4851,  ...,  0.0000,  0.2448,  0.8426],
        [-0.0709, -0.1035, -0.2242,  ...,  0.2448,  0.0000,  0.7434],
        [-0.1091, -0.1899, -0.5243,  ...,  0.8426,  0.7434,  0.0000]])

In [45]:
reshape_pos = dataset.data.pos.reshape(8836, 53, 53)
reshape_pos

tensor([[[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 0., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 1.]],

        [[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 0., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 1.]],

        [[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 0., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 1.]],

        ...,

        [[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0., 

In [46]:
dataset.data

Data(x=[468308, 53], edge_index=[2, 24352016], edge_attr=[24352016, 1], y=[8836, 1], pos=[468308, 53])

In [47]:
reshape_dataset.shape

torch.Size([8836, 53, 53])

In [48]:
from torch_geometric.data import Data
from torch_geometric.utils import add_self_loops


def extract_subgraphs(edge_index, edge_attr, num_subjects, num_nodes_per_subject, edges_per_subject):
    subgraphs = []
    
    for subject in range(num_subjects):
        start_idx = subject * edges_per_subject
        end_idx = start_idx + edges_per_subject
        
        # Extract the edge index and attributes for the current subject
        subject_edge_index = edge_index[:, start_idx:end_idx]
        subject_edge_attr = edge_attr[start_idx:end_idx]

         # Add self-loops to the edge_index and adjust edge_attr
        num_edges = subject_edge_index.size(1)
        subject_edge_index, subject_edge_attr = add_self_loops(subject_edge_index, edge_attr=subject_edge_attr, num_nodes=num_nodes_per_subject)
        
        
        # Create a PyG Data object for the subgraph
        data = Data(edge_index=subject_edge_index, edge_attr=subject_edge_attr, num_nodes=num_nodes_per_subject)
        subgraphs.append(data)
    
    return subgraphs

# Extract subgraphs for each subject
num_subjects = 8836
num_nodes_per_subject = 53
total_edges = 24352016
edges_per_subject = total_edges // num_subjects
subgraphs = extract_subgraphs(dataset.data.edge_index, dataset.data.edge_attr, num_subjects, num_nodes_per_subject, edges_per_subject)

# Check the first subgraph
print(subgraphs[0])

Data(edge_index=[2, 2809], edge_attr=[2809, 1], num_nodes=53)


In [49]:
reshape_dataset[6209].shape

torch.Size([53, 53])

In [50]:
subgraphs[0].edge_attr.shape


torch.Size([2809, 1])

In [51]:
import dgl
import numpy as np
import torch

# Create a list of DGLGraph objects
graphs = []
for i in range(num_subjects):
    # Replace with your actual data
    x = reshape_dataset[i]
    edge_index = subgraphs[i].edge_index
    edge_attr = subgraphs[i].edge_attr

    # Convert edge_attr to a numpy array and flatten it
    edge_attr_np = edge_attr.numpy().flatten()  # Convert to numpy array and flatten
    
    # Ensure edge_attr_np has at least one element
    if len(edge_attr_np) == 0:
        raise ValueError("edge_attr_np is empty. Check your edge_attr data.")
    
    # Compute the threshold for the top 30% of edge attributes
    threshold = np.percentile(edge_attr_np, 70)  # 70th percentile for top 30%
    
    # Select edges where edge_attr is in the top 30%
    mask = edge_attr_np >= threshold
    mask = torch.tensor(mask)  # Convert mask to a PyTorch tensor
    
    # Filter edge_index and edge_attr using the mask
    filtered_edge_index = edge_index[:, mask]
    filtered_edge_attr = edge_attr[mask]
    
    # Create the DGL graph with the filtered edges
    g = dgl.graph((filtered_edge_index[0], filtered_edge_index[1]))
    # g = dgl.add_self_loop(g)
    g.ndata['x'] = x
    g.edata['edge_attr'] = np.abs(filtered_edge_attr)
    g.ndata['pos'] = reshape_pos[i]
    # g.graph_attr['label'] = dataset.data.y[i]
    # print(i)
    graphs.append(g)


In [52]:
len(graphs)

8836

In [57]:
graphs[0].edata['edge_attr'].shape

torch.Size([843, 1])

In [58]:
dataset.data.y.shape



torch.Size([8836, 1])

In [59]:
torch.save(graphs, "abcd_graphs_crystallized_sparse_30_positive.pt" )

In [60]:
torch.save(dataset.data.y, "abcd_graphs_crystallized_labels.pt" )