In [32]:
import os
import sys
import torch
import torch.nn.functional as F
from imports.ABIDEDataset import ABIDEDataset
from sklearn.metrics._regression import r2_score
import dgl


In [33]:
dataset = ABIDEDataset("data/data/Output","ABCD")
dataset.data.x[dataset.data.x == float('inf')] = 0

In [34]:
dataset

ABCD(8836)

In [35]:
dataset.data.x.shape

torch.Size([468308, 53])

In [36]:
reshape_dataset = dataset.data.x.reshape(8836, 53, 53)
reshape_dataset.shape

torch.Size([8836, 53, 53])

In [37]:
reshape_dataset[0]

tensor([[ 0.0000,  0.1797,  0.0746,  ..., -0.0875, -0.0709, -0.1091],
        [ 0.1797,  0.0000,  0.4158,  ..., -0.2620, -0.1035, -0.1899],
        [ 0.0746,  0.4158,  0.0000,  ..., -0.4851, -0.2242, -0.5243],
        ...,
        [-0.0875, -0.2620, -0.4851,  ...,  0.0000,  0.2448,  0.8426],
        [-0.0709, -0.1035, -0.2242,  ...,  0.2448,  0.0000,  0.7434],
        [-0.1091, -0.1899, -0.5243,  ...,  0.8426,  0.7434,  0.0000]])

In [38]:
reshape_pos = dataset.data.pos.reshape(8836, 53, 53)
reshape_pos

tensor([[[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 0., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 1.]],

        [[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 0., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 1.]],

        [[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 0., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 1.]],

        ...,

        [[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0., 

In [39]:
dataset.data

Data(x=[468308, 53], edge_index=[2, 24352016], edge_attr=[24352016, 1], y=[8836, 1], pos=[468308, 53])

In [40]:
reshape_dataset.shape

torch.Size([8836, 53, 53])

In [41]:
from torch_geometric.data import Data
def extract_subgraphs(edge_index, edge_attr, num_subjects, num_nodes_per_subject, edges_per_subject):
    subgraphs = []
    
    for subject in range(num_subjects):
        start_idx = subject * edges_per_subject
        end_idx = start_idx + edges_per_subject
        
        # Extract the edge index and attributes for the current subject
        subject_edge_index = edge_index[:, start_idx:end_idx]
        subject_edge_attr = edge_attr[start_idx:end_idx]
        
        # Create a PyG Data object for the subgraph
        data = Data(edge_index=subject_edge_index, edge_attr=subject_edge_attr, num_nodes=num_nodes_per_subject)
        subgraphs.append(data)
    
    return subgraphs

# Extract subgraphs for each subject
num_subjects = 8836
num_nodes_per_subject = 53
total_edges = 24352016
edges_per_subject = total_edges // num_subjects
subgraphs = extract_subgraphs(dataset.data.edge_index, dataset.data.edge_attr, num_subjects, num_nodes_per_subject, edges_per_subject)

# Check the first subgraph
print(subgraphs[0])

Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53)


In [42]:
subgraphs

[Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2756, 1], num_nodes=53),
 Data(edge_index=[2, 2756], edge_attr=[2

In [43]:

# Create a list of DGLGraph objects
graphs = []
for i in range(num_subjects):
    # Replace with your actual data
    x = reshape_dataset[i]
    edge_index =subgraphs[i].edge_index
    edge_attr = subgraphs[i].edge_attr

    g = dgl.graph((edge_index[0], edge_index[1]))
    g.ndata['x'] = x
    g.edata['edge_attr'] = edge_attr
    g.ndata['pos'] = reshape_pos[i]
    # g.graph_attr['label'] = dataset.data.y[i]
    graphs.append(g)

In [44]:
len(graphs)

8836

In [45]:
graphs[0].ndata['x']

tensor([[ 0.0000,  0.1797,  0.0746,  ..., -0.0875, -0.0709, -0.1091],
        [ 0.1797,  0.0000,  0.4158,  ..., -0.2620, -0.1035, -0.1899],
        [ 0.0746,  0.4158,  0.0000,  ..., -0.4851, -0.2242, -0.5243],
        ...,
        [-0.0875, -0.2620, -0.4851,  ...,  0.0000,  0.2448,  0.8426],
        [-0.0709, -0.1035, -0.2242,  ...,  0.2448,  0.0000,  0.7434],
        [-0.1091, -0.1899, -0.5243,  ...,  0.8426,  0.7434,  0.0000]])

In [46]:
graphs[0]

Graph(num_nodes=53, num_edges=2756,
      ndata_schemes={'x': Scheme(shape=(53,), dtype=torch.float32), 'pos': Scheme(shape=(53,), dtype=torch.float32)}
      edata_schemes={'edge_attr': Scheme(shape=(1,), dtype=torch.float32)})

In [47]:
dataset.data.y.shape

torch.Size([8836, 1])

In [48]:
torch.save(graphs, "abcd_graphs_crystallized.pt" )

In [49]:
torch.save(dataset.data.y, "abcd_graphs_crystallized_labels.pt" )