# Graph ConvNet for cosmology: part of sphere classification (whole data)

[Nathanaël Perraudin](http://perraudin.info), [Michaël Defferrard](http://deff.ch), Tomasz Kacprzak

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import os
import shutil

import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from tqdm import tqdm_notebook
import healpy as hp
import pygsp

from scnn import models, utils
from scnn.data import LabeledDatasetWithNoise, LabeledDataset

In [None]:
plt.rcParams['figure.figsize'] = (17, 5)

In [None]:
Nside = 256
order = 4
EXP_NAME = '40sim_{}sides_{}order'.format(Nside, order)
data_path = 'data/same_psd/'

## 1 Load spherical data 

Load the different maps and prepare some dataset.

In [None]:
ds1 = []
ds2 = []

for filename in tqdm_notebook(os.listdir(data_path)):
    
    if not filename.endswith('fits'):
        continue
        
    filepath = os.path.join(data_path, filename)
    img = hp.read_map(filepath, verbose=False)
    img = hp.reorder(img, r2n=True)
    img = hp.ud_grade(img, nside_out=Nside, order_in='NESTED')
    
    if '0p26' in filename:
        ds1.append(img)
    elif '0p31' in filename:
        ds2.append(img)

## 2 Data preparation

### 2.1 Samples creation

We here create samples by dividing the two complete spheres in patches (based on healpix sampling).

In [None]:
data = dict()
data['class1'] = np.vstack([utils.hp_split(el, order=order) for el in ds1])
data['class2'] = np.vstack([utils.hp_split(el, order=order) for el in ds2])
del ds1
del ds2

print('The data is of shape {}'.format(data['class1'].shape))

### 2.2 Normalization and train / test split 

Let us split the data into training and testing sets. The raw data is stored into `x_raw` and the histograms into `x_trans`. As a transformation, we cannot use the power spectrum density. Hence we do an histogram of the data.

In [None]:
# Normalize and transform the data, i.e. extract features.
x_raw = np.vstack((data['class1'], data['class2']))
x_raw = x_raw / np.mean(x_raw**2) # Apply some normalization (We do not want to affect the mean)
cmin = np.min(x_raw)
cmax = np.max(x_raw)
x_hist = utils.histogram(x_raw, cmin, cmax)
x_trans = preprocessing.scale(x_hist)

# Create the label vector.
labels = np.zeros([x_raw.shape[0]], dtype=int)
labels[len(data['class1']):] = 1

# Random train / test split.
ntrain = 2000
ret = train_test_split(x_raw, x_trans, labels, test_size=len(x_raw)-ntrain, shuffle=True)
x_raw_train, x_raw_test, x_trans_train, x_trans_test, labels_train, labels_test = ret

print('Class 1 VS class 2')
print('  Training set: {} / {}'.format(np.sum(labels_train==0), np.sum(labels_train==1)))
print('  Test set: {} / {}'.format(np.sum(labels_test==0), np.sum(labels_test==1)))

## 3  Classification using SVM

Let us test classify our data using an SVM classifier.

While running an SVM classifier on the data will fail because of their dimensionality, we observe that we can correctly classify our dataset using the histogram.

In [None]:
# clf = SVC(kernel='rbf')
# clf.fit(x_raw_train, labels_train)
# 
# utils.print_error(clf, x_raw_train, labels_train, 'Training')
# utils.print_error(clf, x_raw_test, labels_test, 'Test')

In [None]:
# clf = SVC(kernel='rbf')
# clf.fit(x_trans_train, labels_train) 
# 
# utils.print_error(clf, x_trans_train, labels_train, 'Training')
# utils.print_error(clf, x_trans_test, labels_test, 'Test')

## 4 Classification using a spherical CNN

Let us now classify our data using a spherical convolutional neural network.

In [None]:
training = LabeledDataset(x_raw_train, labels_train)
testing = LabeledDataset(x_raw_test, labels_test)

In [None]:
nsides = [Nside, Nside//2, Nside//4, min(Nside//8, 128)]
# nsides = [2048, 1024, 256, 64]
# nsides = [128, 32, 16]
print('#sides: {}'.format(nsides))

indexes = utils.nside2indexes(nsides, order)

In [None]:
# Cleanup before running again.
shutil.rmtree('summaries/{}/'.format(EXP_NAME), ignore_errors=True)
shutil.rmtree('checkpoints/{}/'.format(EXP_NAME), ignore_errors=True)

In [None]:
C = 2 # number of class

params = dict()
params['dir_name']       = EXP_NAME
params['num_epochs']     = 100
params['batch_size']     = 20
params['eval_frequency'] = 10

# Building blocks.
params['brelu']          = 'b1relu'  # Activation.
params['pool']           = 'apool1'  # Pooling.

# Architecture.
params['nsides']         = nsides  # Sizes of the laplacians are 12 * nsides**2.
params['indexes']        = indexes  # Sizes of the laplacians are 12 * nsides**2.
params['F']              = [10, 40, 160, 10]  # Number of graph convolutional filters.
params['K']              = [10, 10, 10, 10]  # Polynomial orders.
params['batch_norm']     = [True, True, True, True]  # Batch norm.
params['M']              = [100, C]  # Output dimensionality of fully connected layers.

# Optimization.
params['regularization'] = 5e-4
params['dropout']        = 0.5
params['learning_rate']  = 1e-4
params['decay_rate']     = 0.95
params['momentum']       = 0.9
params['adam']           = True
params['decay_steps']    = ntrain / params['batch_size']

model = models.scnn(**params)

In [None]:
accuracy, loss, t_step = model.fit(training, testing)

In [None]:
utils.print_error(model, x_raw_train, labels_train, 'Training')
utils.print_error(model, x_raw_test, labels_test, 'Test')

## 5 Discussion


## 6 Filters visualization

In [None]:
folder = 'results/filters/{}/'.format(EXP_NAME)
try:
    os.mkdir(folder)
except FileExistsError:
    pass

In [None]:
layer = 1

K, Fout = params['K'][layer-1], params['F'][layer-1]
trained_weights = model.get_var('conv{}/weights'.format(layer))  # Fin*K x Fout
trained_weights = trained_weights.reshape((-1, K, Fout))
if layer >= 2:
    Fin = params['F'][layer-2]
    assert trained_weights.shape == (Fin, K, Fout)
Fin, K, Fout = trained_weights.shape

# Fin x K x Fout => K x Fout x Fin
trained_weights = trained_weights.transpose([1, 2, 0])
trained_weights.shape

In [None]:
fig, ax = plt.subplots()
ax.plot(trained_weights.reshape((K, Fin*Fout)), '.')
ax.set_title('Learned Chebyshev coefficients')
fig.savefig('{}/layer{}_coefficients.png'.format(folder, layer), dpi=100)

In [None]:
# Sanity check.
# trained_weights = np.identity(5)

nside = params['nsides'][layer-1]
G = utils.healpix_graph(nside=nside)
G.estimate_lmax()

filters = pygsp.filters.Chebyshev(G, trained_weights)

# Sanity check.
# filters = pygsp.filters.Heat(G, tau=[5, 10, 20, 50])
# filters = filters.approximate('Chebyshev', order=4)

fig, ax = plt.subplots()
filters.plot(sum=False, ax=ax)
fig.savefig('{}/layer{}_spectral.png'.format(folder, layer), dpi=100)

fig = utils.plot_filters_section(filters)
fig.savefig('{}/layer{}_section.png'.format(folder, layer), dpi=100)