Download dataset

In [None]:
import os
import io
import shutil
import argparse
import pandas as pd
import numpy as np
from construct_graph import brain_graph
from nilearn.datasets import fetch_abide_pcp
from nilearn.datasets.utils import _fetch_file

parser = argparse.ArgumentParser()

In [None]:
parser.add_argument('--root', type=str, default='./data', help='Path to store the brain graphs')
parser.add_argument('--verbose', type=bool, default=True, help='Print the download details')

args = parser.parse_args([])

In [None]:
def delete_path(root):
    if os.path.isfile(root):
        try:
            os.remove(root)
        except:
            pass
    elif os.path.isdir(root):
        for item in os.listdir(root):
            file = os.path.join(root, item)
            delete_path(file)
            try:
                os.rmdir(root)
            except:
                pass




In [None]:
def load_text(data_path, text):
    """
    Load the non-imaging information from data downloaded.
    The official file Phenotypic_V1_0b_preprocessed1.csv contains some redundant information.
    :param data_path: path of the downloaded data.
    :return: non-imaging information for all the samples downloaded.
    """
    files = [f for f in os.listdir(data_path) if f.endswith('.1D')]
    filenames = [name.split('.')[0] for name in files]  # remove .1D
    file_idx = [name[:-8] for name in filenames]  # remove _rois_ho
    idx = pd.DataFrame({'FILE_ID': file_idx, 'file_name': files})
    logs = pd.merge(idx, text, how='left', on='FILE_ID')
    return logs

In [None]:
print('Downloading the ABIDE I dataset preprocessed by CPAC...')
fetch_abide_pcp(data_dir='./temp', derivatives='rois_ho', verbose=args.verbose,
                pipeline='cpac', band_pass_filtering=True, global_signal_regression=True)




In [None]:
# path generated by fetch abide
path = os.path.join('./temp', 'ABIDE_pcp', 'cpac', 'filt_global')

# phenotypic information
info_path = os.path.join(args.root, 'phenotypic')
if not os.path.exists(info_path):
    os.makedirs(info_path)
print('Loading phenotypic information')
phenotypic = pd.read_csv(os.path.join('./temp', 'ABIDE_pcp', 'Phenotypic_V1_0b_preprocessed1.csv'))
logs = load_text(path, phenotypic)



In [None]:
# rescale label value
logs['label'] = [2 - i for i in logs['DX_GROUP']]
logs.to_csv(os.path.join(args.root, 'phenotypic', 'log.csv'))



In [None]:
# download HO atlas labels
print('Downloading Harvard-Oxford Atlas')
src_path = 'https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative/Resources/ho_labels.csv'
_fetch_file(src_path, info_path)
atlas = pd.read_csv(os.path.join(info_path, 'ho_labels.csv'), comment='#', header=None, names=['index', 'area'])



In [None]:
# construct graph representation of the brain
print('Constructing the graph representation of the brain...')
brain_graph(logs, atlas, os.path.join(args.root, 'ABIDE', 'raw'), path)



In [None]:
# delete all the downloaded data
# print('Deleting the raw ABIDE I data...')
# delete_path('./temp')

In [26]:
import argparse
import torch
import os
import pandas as pd
from construct_graph import population_graph
from kfold_eval import kfold_mlp, kfold_gcn
from training import graph_pooling, extract

parser = argparse.ArgumentParser()

In [27]:
parser.add_argument('--seed', type=int, default=13, help='random seed')
parser.add_argument('--batch_size', type=int, default=128, help='batch size')
parser.add_argument('--lr', type=float, default=0.0001, help='learning rate')
parser.add_argument('--nhid', type=int, default=256, help='hidden size of MLP')
parser.add_argument('--pooling_ratio', type=float, default=0.05, help='pooling ratio')
parser.add_argument('--dropout_ratio', type=float, default=0.01, help='dropout ratio')
parser.add_argument('--data_dir', type=str, default='./data', help='root of all the datasets')
parser.add_argument('--device', type=str, default='cuda:0', help='specify cuda devices')
parser.add_argument('--check_dir', type=str, default='./checkpoints', help='root of saved models')
parser.add_argument('--result_dir', type=str, default='./results', help='root of classification results')
parser.add_argument('--verbose', type=bool, default=True, help='print training details')

args = parser.parse_args([])

In [28]:
torch.manual_seed(args.seed)

<torch._C.Generator at 0x20ce29e03f0>

In [29]:
downsample_file = os.path.join(args.data_dir, 'ABIDE_downsample',
                                   'ABIDE_pool_{:.3f}_.txt'.format(args.pooling_ratio))
if not os.path.exists(downsample_file):
    print('Running graph pooling with pooling ratio = {:.3f}'.format(args.pooling_ratio))
    graph_pooling(args)

# load sparse brain networking
downsample = pd.read_csv(downsample_file, header=None, sep='\t').values

kfold_mlp(downsample, args)

# use the best MLP model to extract further learned features
# from pooling results
extract(downsample, args)

1 times CV out of 3 on training MLP...
Training MLP on the 1 fold...
1 val set out of 10
 Epoch: 0116 loss_train: 0.373104 acc_train: 1.000000 time: 6.999903ssEpoch: 0094 loss_train: 0.453339 acc_train: 0.998580 time: 6.396903s
Optimization Finished! Total time elapsed: 7.000906
2 val set out of 10
 Epoch: 0106 loss_train: 0.448671 acc_train: 0.998580 time: 2.697001ss

KeyboardInterrupt: 

In [31]:
# check if population graph is constructed
adj_path = os.path.join(args.data_dir, 'population graph', 'ABIDE.adj')
attr_path = os.path.join(args.data_dir, 'population graph', 'ABIDE.attr')

if not os.path.exists(adj_path) or not os.path.exists(attr_path):
    population_graph(args)

# Load population graph
edge_index = pd.read_csv(adj_path, header=None).values
edge_attr = pd.read_csv(attr_path, header=None).values.reshape(-1)

# run GCN
kfold_gcn(edge_index, edge_attr, downsample.shape[0], args)


Training GCN on the 1 fold


KeyboardInterrupt: 

In [1]:
import pandas as pd

In [20]:
node_attributes = pd.read_csv("data/ABIDE/raw/ABIDE_node_attributes.txt",sep=",",lineterminator="\n",header=None)

In [21]:
node_attributes

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,68,69,70,71,72,73,74,75,76,77
0,-3.736645,-7.589883,-8.920371,-3.891416,5.987351,13.209374,10.229992,-3.293651,-19.336110,-27.782725,...,-1.434309,-2.863544,-0.336828,8.235963,16.624632,17.412071,10.107556,1.151430,-3.249882,-2.848707
1,0.891726,-2.532437,-7.425550,-8.894101,-4.457442,2.188095,4.196222,-1.163910,-9.110473,-12.120529,...,0.641146,-7.066960,-11.406058,-5.653504,6.805342,15.558355,13.488030,3.294748,-6.031457,-8.078596
2,-4.578270,-6.565946,-5.818296,-1.301829,5.348949,10.387771,10.928948,7.349622,2.447879,-1.491270,...,-2.298424,-7.915710,2.123940,19.853298,26.697444,13.861925,-7.621039,-18.470124,-10.997808,4.553665
3,-11.259179,-8.396744,-0.960917,4.780611,3.760062,-3.718630,-12.774533,-18.220829,-17.951688,-12.977178,...,1.031009,-3.268954,5.030508,21.198344,29.321371,19.473636,-1.049798,-14.783236,-11.514702,2.451499
4,-7.712941,-11.710429,-10.108960,-3.214908,3.609653,4.239905,-2.202108,-9.812311,-11.075350,-3.924096,...,-1.365793,-4.925235,-5.020392,-0.667567,5.524105,10.835551,14.282211,15.479047,13.486396,7.748561
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96676,18.192738,17.125673,3.463869,-14.832759,-24.259683,-18.270554,-4.618219,1.121180,-9.169209,-27.201320,...,-2.311464,24.705390,38.200444,26.160590,1.359563,-12.879697,-4.360073,19.584142,40.695344,43.915498
96677,-16.733135,-20.221566,-3.664311,19.644070,25.048601,4.365131,-19.875515,-14.838797,24.982796,64.868006,...,66.593431,34.870179,-1.301634,-15.401195,-10.117821,-6.831096,-17.815473,-34.485495,-40.938339,-32.223209
96678,0.204250,-4.004687,-7.982310,-7.837147,-3.476984,1.621057,4.931650,7.478133,10.839421,12.925819,...,8.163697,2.662432,-4.709224,-8.634936,-7.336008,-2.528198,2.731150,5.689378,4.871487,1.058272
96679,-23.996746,-41.407106,-33.238389,-4.008315,18.893731,15.410995,-2.418937,-2.623407,28.364694,65.193603,...,28.987393,42.514390,37.103104,25.808236,14.608091,-0.681397,-20.717948,-34.233646,-28.883626,-7.912153


In [22]:
df = pd.read_csv(filepath_or_buffer="./temp/ABIDE_pcp/cpac/filt_global/Caltech_0051461_rois_ho.1D",sep="\t",lineterminator='\n')

In [24]:
df.iloc[:78, :].T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,68,69,70,71,72,73,74,75,76,77
#10,-3.736645,-7.589883,-8.920371,-3.891416,5.987351,13.209374,10.229992,-3.293651,-19.336110,-27.782725,...,-1.434309,-2.863544,-0.336828,8.235963,16.624632,17.412071,10.107556,1.151430,-3.249882,-2.848707
#11,0.891726,-2.532437,-7.425550,-8.894101,-4.457442,2.188095,4.196222,-1.163910,-9.110473,-12.120529,...,0.641146,-7.066960,-11.406058,-5.653504,6.805342,15.558355,13.488030,3.294748,-6.031457,-8.078596
#12,-4.578270,-6.565946,-5.818296,-1.301829,5.348949,10.387771,10.928948,7.349622,2.447879,-1.491270,...,-2.298424,-7.915710,2.123940,19.853298,26.697444,13.861925,-7.621039,-18.470124,-10.997808,4.553665
#13,-11.259179,-8.396744,-0.960917,4.780611,3.760062,-3.718630,-12.774533,-18.220829,-17.951688,-12.977178,...,1.031009,-3.268954,5.030508,21.198344,29.321371,19.473636,-1.049798,-14.783236,-11.514702,2.451499
#17,-7.712941,-11.710429,-10.108960,-3.214908,3.609653,4.239905,-2.202108,-9.812311,-11.075350,-3.924096,...,-1.365793,-4.925235,-5.020392,-0.667567,5.524105,10.835551,14.282211,15.479047,13.486396,7.748561
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
#4602,8.880210,17.483394,36.408709,53.882401,52.916574,30.450051,4.645346,-1.807937,13.316423,27.153510,...,-14.431600,-11.990715,-1.367817,7.081625,8.869806,5.725998,0.545920,-4.504385,-6.241137,-1.393359
#4701,-12.701598,-3.369833,6.969579,1.306214,-22.448846,-47.175290,-54.384343,-43.695275,-30.984654,-28.129194,...,17.397292,28.670583,27.980818,11.970947,-10.347347,-25.238084,-26.972130,-22.141215,-20.811822,-24.394963
#4702,1.550715,4.909884,2.196238,-3.351472,-4.725982,0.022387,5.211281,4.634311,-0.647163,-3.689853,...,13.322079,17.961676,15.407743,7.171674,-2.591773,-8.396860,-6.323876,2.038572,9.091599,7.212100
#4801,-3.085262,-4.880674,-5.140999,-3.903228,-2.235251,-2.518293,-7.131203,-15.325522,-21.614845,-19.299310,...,-20.486530,-31.026886,-30.690454,-12.397126,14.375437,31.816328,32.357729,24.939371,21.606281,22.135647


In [18]:
if(-1==False): print("hello")

In [12]:
import numpy as np

In [17]:
np.array_split(np.arange(1, 2*5 +1), 5)

[array([1, 2]), array([3, 4]), array([5, 6]), array([7, 8]), array([ 9, 10])]

In [11]:
import os
file = open("./temp/ABIDE_pcp/cpac/filt_global/Caltech_0051461_rois_ho.1D",'r')