In [27]:
from pathlib import Path
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch_geometric.data import Data
from sklearn.model_selection import train_test_split


In [28]:
from graphs import graph_to_data
from data import list_graphs, load_pheno, attach_age
from models import GraphSAGERegressor, GCNRegressor
from utils import device
from paths import PHENOTYPES_CSV, GRAPH_DIR

In [14]:
!python train.py --graph_resolution 86 --model gcn --epochs 30 --hidden-dim 32 --num-layers 3 --dropout 0.0 --edge-key number_of_fibers --learning-rate 1e-3 --batch-size 256

Loaded graphs from cache based on 86_nodes and number_of_fibers
Model: GCNRegressor, Hidden Dim: 32, Num Layers: 3, Dropout: 0.0
epoch   1 | train loss 30.881 | val loss 30.675 | train MAE 30.881 | val MAE 30.675
epoch  10 | train loss 25.686 | val loss 25.611 | train MAE 25.686 | val MAE 25.611
epoch  20 | train loss 7.814 | val loss 5.689 | train MAE 7.814 | val MAE 5.689
epoch  30 | train loss 3.347 | val loss 3.390 | train MAE 3.347 | val MAE 3.390

Test loss (L1): 3.249
Test MAE: 3.249 years
Training finished.
Saved training curves to /Users/cyprienrivier/kDrive/AI/Stanford/Graphs/Project/code/output/figures/training_metrics.png
Figure(1000x400)


In [19]:
!python train.py --graph_resolution 86 --model sage --epochs 50 --hidden-dim 32 --num-layers 3 --dropout 0.1 --edge-key number_of_fibers --learning-rate 1e-4 --batch-size 256

Loaded graphs from cache based on 86_nodes and number_of_fibers
Model: SAGERegressor, Hidden Dim: 32, Num Layers: 3, Dropout: 0.1
epoch   1 | train loss 23.496 | val loss 23.367 | train MAE 23.496 | val MAE 23.367
epoch  10 | train loss 17.978 | val loss 17.924 | train MAE 17.978 | val MAE 17.924
epoch  20 | train loss 10.383 | val loss 10.457 | train MAE 10.383 | val MAE 10.457
epoch  30 | train loss 3.486 | val loss 3.699 | train MAE 3.486 | val MAE 3.699
epoch  40 | train loss 3.378 | val loss 3.645 | train MAE 3.378 | val MAE 3.645
epoch  50 | train loss 3.351 | val loss 3.658 | train MAE 3.351 | val MAE 3.658

Test loss (L1): 3.565
Test MAE: 3.565 years
Training finished.
Saved training curves to /Users/cyprienrivier/kDrive/AI/Stanford/Graphs/Project/code/output/figures/training_metrics.png
Figure(1000x400)


In [22]:
!python train.py --graph_resolution 463 --model sage --epochs 30 --hidden-dim 32 --num-layers 2 --dropout 0.01 --edge-key number_of_fibers --learning-rate 5e-4 --batch-size 256

Loaded graphs from cache based on 463_nodes and number_of_fibers
Model: SAGERegressor, Hidden Dim: 32, Num Layers: 2, Dropout: 0.01
epoch   1 | train loss 32.303 | val loss 30.805 | train MAE 32.303 | val MAE 30.805
epoch  10 | train loss 7.970 | val loss 6.363 | train MAE 7.970 | val MAE 6.363
epoch  20 | train loss 3.360 | val loss 3.704 | train MAE 3.360 | val MAE 3.704
epoch  30 | train loss 3.289 | val loss 3.497 | train MAE 3.289 | val MAE 3.497

Test loss (L1): 3.505
Test MAE: 3.505 years
Training finished.
Saved training curves to /Users/cyprienrivier/kDrive/AI/Stanford/Graphs/Project/code/output/figures/training_metrics.png
Figure(1000x400)


In [24]:
!python train.py --graph_resolution 86 --model sage --epochs 30 --hidden-dim 32 --num-layers 2 --dropout 0.01 --edge-key fiber_length_mean --learning-rate 5e-4 --batch-size 256

Processing...
Processing graphs from: data/HCP/86_nodes
Done!
Loaded graphs from cache based on 86_nodes and fiber_length_mean
Model: SAGERegressor, Hidden Dim: 32, Num Layers: 2, Dropout: 0.01
epoch   1 | train loss 20.994 | val loss 19.788 | train MAE 20.994 | val MAE 19.788
epoch  10 | train loss 3.669 | val loss 3.934 | train MAE 3.669 | val MAE 3.934
epoch  20 | train loss 3.250 | val loss 3.455 | train MAE 3.250 | val MAE 3.455
epoch  30 | train loss 3.213 | val loss 3.436 | train MAE 3.213 | val MAE 3.436

Test loss (L1): 3.354
Test MAE: 3.354 years
Training finished.
Saved training curves to /Users/cyprienrivier/kDrive/AI/Stanford/Graphs/Project/code/output/figures/training_metrics.png
Figure(1000x400)


In [25]:
!python train.py --graph_resolution 86 --model sage --epochs 30 --hidden-dim 32 --num-layers 2 --dropout 0.01 --edge-key FA_mean --learning-rate 5e-4 --batch-size 256

Processing...
Processing graphs from: data/HCP/86_nodes
Done!
Loaded graphs from cache based on 86_nodes and FA_mean
Model: SAGERegressor, Hidden Dim: 32, Num Layers: 2, Dropout: 0.01
epoch   1 | train loss 23.978 | val loss 22.215 | train MAE 23.978 | val MAE 22.215
epoch  10 | train loss 4.487 | val loss 4.395 | train MAE 4.487 | val MAE 4.395
epoch  20 | train loss 3.410 | val loss 3.637 | train MAE 3.410 | val MAE 3.637
epoch  30 | train loss 3.401 | val loss 3.650 | train MAE 3.401 | val MAE 3.650

Test loss (L1): 3.677
Test MAE: 3.677 years
Training finished.
Saved training curves to /Users/cyprienrivier/kDrive/AI/Stanford/Graphs/Project/code/output/figures/training_metrics.png
Figure(1000x400)


In [8]:
graph_resolution = 86

In [11]:
graph_dir = Path(GRAPH_DIR+f"/{graph_resolution}_nodes")
files = list_graphs(graph_dir)
pheno = load_pheno(PHENOTYPES_CSV)
ages  = attach_age(files, pheno)

In [18]:
len(ages), len(files)

(1064, 1064)

In [21]:
idx = np.arange(len(files))
tr, te = train_test_split(idx, test_size=0.2, random_state=0)
tr, va = train_test_split(tr,  test_size=0.2, random_state=0)

In [22]:
train_files = [files[i] for i in tr]
val_files = [files[i] for i in va]
test_files = [files[i] for i in te]

train_ages = [ages[i] for i in tr]
val_ages = [ages[i] for i in va]
test_ages = [ages[i] for i in te]

In [23]:
train_data = [graph_to_data(p, a, edge_key="number_of_fibers") for p,a in zip(train_files, train_ages)]
val_data = [graph_to_data(p, a, edge_key="number_of_fibers") for p,a in zip(val_files, val_ages)]
test_data = [graph_to_data(p, a, edge_key="number_of_fibers") for p,a in zip(test_files, test_ages)]

In [28]:
train_data[0]

Data(x=[83, 3], edge_index=[2, 1400], y=[1], edge_weight=[1400])

In [30]:
train_data[0].edge_weight

tensor([ 84.3750,  84.3750,  27.5000,  ..., 477.6250,  75.6250,  75.6250])

In [29]:
df= pd.read_csv("data/HCP/HCP_subjects_age_only.csv")

In [31]:
df.age.describe()

count    1206.000000
mean       28.837479
std         3.690534
min        22.000000
25%        26.000000
50%        29.000000
75%        32.000000
max        37.000000
Name: age, dtype: float64