<a href="https://colab.research.google.com/github/allnes/age_classifier/blob/master/train_age_pip.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Connect to Google Drive

In [2]:
from google.colab import drive
import os, natsort as nsrt, numpy as np, re
from scipy.sparse import coo_matrix, csgraph, csr_matrix
import matplotlib.pyplot as plt
!pip install ChebyGCN

drive.mount('/content/drive')
PATH_PROJECT='/content/drive/My Drive/DL_DATA_GRAPH/'
PATH_CNN_REPO=PATH_PROJECT + 'BUILD/cnn_graph/'
os.chdir(PATH_CNN_REPO)
from lib import models, graph, coarsening, utils
%ls

# !git clone https://github.com/mdeff/cnn_graph
!git pull origin master

os.chdir(PATH_PROJECT)
%ls
%matplotlib inline

Collecting ChebyGCN
  Downloading https://files.pythonhosted.org/packages/ce/5c/44ee685d452af7324c6685850ac8a2cc3e5d2a2bc2dea55cdd9a2816ab90/ChebyGCN-0.0.3.tar.gz
Building wheels for collected packages: ChebyGCN
  Building wheel for ChebyGCN (setup.py) ... [?25l[?25hdone
  Created wheel for ChebyGCN: filename=ChebyGCN-0.0.3-cp36-none-any.whl size=7656 sha256=b84feaff4a7c7e02704e6195169a2a28e1b355ab7d80d7339af7d020c79c16b4
  Stored in directory: /root/.cache/pip/wheels/56/d8/ca/b410394e2032a20ba9696feaab37698718f49af5e8a73690d8
Successfully built ChebyGCN
Installing collected packages: ChebyGCN
Successfully installed ChebyGCN-0.0.3


KeyboardInterrupt: ignored

## Preprocessing data

In [0]:
PATH_GRAPHS=PATH_PROJECT + 'DATA/mini_graphs/graphs/'
list_grpahs = []
for (_, _, filenames) in os.walk(PATH_GRAPHS):
    list_grpahs = list_grpahs + filenames

list_grpahs = nsrt.natsorted(list_grpahs)[0::2]

num_samples = int(np.load(PATH_GRAPHS + list_grpahs[0])['num_samples'])
num_features = int(np.load(PATH_GRAPHS + list_grpahs[0])['num_features'])

list_of_rows = []
list_of_cols = []
list_of_max_vertices = []
list_of_data = []

for graph_name in list_grpahs:
    with np.load(PATH_GRAPHS + graph_name) as raw_graph:
        raw_edges = raw_graph['E'].transpose()
        rows = np.array(raw_edges[0])
        cols = np.array(raw_edges[1])

        max_range = max(np.max(rows), np.max(cols))
        unused_indexes = []
        for index in range(max_range):
            if (not index in rows) and (not index in cols):
                unused_indexes.append(index)
        unused_indexes = np.array(unused_indexes)

        used_indexes = np.concatenate((rows, cols))
        used_indexes = np.unique(used_indexes, axis=0)
        used_indexes[::-1].sort()

        for used_var, unused_var in zip(used_indexes, unused_indexes):
            np.place(rows, rows == used_var, unused_var)
            np.place(cols, cols == used_var, unused_var)
        max_range = max(np.max(rows), np.max(cols))
        raw_data = raw_graph['D']

        list_of_rows.append(rows)
        list_of_cols.append(cols)
        list_of_max_vertices.append(max_range)
        list_of_data.append(raw_data)

        # print('used vertices shape: ', used_indexes.shape)
        # print('unused vertices shape:', unused_indexes.shape)
        # print('new max of vertices: ', max_range)



In [0]:
assert np.max(list_of_max_vertices) == np.min(list_of_max_vertices)
size_matrix = np.max(list_of_max_vertices) + 1

X = []
for raw_data, rows, cols in zip(list_of_data, list_of_rows, list_of_cols):
    sparse_graph = coo_matrix((raw_data, (rows, cols)),
                              shape=(size_matrix, size_matrix))
    dense_graph = sparse_graph.todense()
    X.append(dense_graph)
X = np.array(X)
X = X.reshape((X.shape[0], X.shape[1] * X.shape[2]))

PATH_LABELS=PATH_PROJECT + 'DATA/mini_graphs/GSE87571_samples.txt'

raw_file = open(PATH_LABELS, 'r')
y = []
for line in raw_file.readlines():
    match_obj = re.match(r'(GSM[0-9]*)\s*([M,F])\s*([0-9]*)\s*([0-9]*)', line)
    if not match_obj is None: 
        y.append(int(match_obj.group(3)))
y = np.array(y)

assert len(y) == num_samples
assert len(X) == num_samples

print(raw_graph.files)
print(X.shape)
print(y.shape)

## Train

In [0]:
from ChebyGCN import layers, coarsening

print('--> Preprocessing data')
n_train = (num_samples * 3) // 4
n_val = num_samples // 10

X_train = X[:n_train]
X_val   = X[n_train:n_train+n_val]
X_test  = X[n_train+n_val:]

y_train = y[:n_train] // 10
y_val   = y[n_train:n_train+n_val] // 10
y_test  = y[n_train+n_val:] // 10

print('--> Get distance graph')
dist, idx = graph.distance_sklearn_metrics(X, k=2, metric='euclidean')
A = graph.adjacency(dist, idx).astype(np.float32)

print('--> Get laplacian matrix')
A = csr_matrix(A)
graphs, perm = coarsening.coarsen(A, levels=3, self_connections=True) #produce graph coarsenings 
X_train = coarsening.perm_data(X_train, perm)
X_test = coarsening.perm_data(X_test, perm)
L = [coarsening.laplacian(A, normalized=True) for A in graphs]

print('--> Get model')
x_input = Input(shape=(X_train.shape[1],))
x = Reshape((X_train.shape[1],1))(x_input)
x = layers.GraphConvolution( 8, 2, 20, L[0])(x)
x = layers.GraphConvolution( 8, 4, 10, L[2])(x)
x = Flatten()(x)
x = Dense(66, activation='softmax')(x)