# Graph ConvNet for cosmology: whole sphere classification

[Nathanaël Perraudin](http://perraudin.info), [Michaël Defferrard](http://deff.ch), Tomasz Kacprzak

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import healpy as hp

from scnn import models

In [None]:
plt.rcParams['figure.figsize'] = (17, 5)

## 1 Load some spherical data 

The data consists of ...

The produced maps have been down-sampled from `NSIDE=1024` to `NSIDE=64` using the `ud_grade` function of the `healpy` package.

In [None]:
data = np.load('data/maps_downsampled_64.npz')
assert(len(data['class1']) == len(data['class2']))
nclass = len(data['class1'])

Let us plot a map of each class. It is not easy to visually see a difference.

In [None]:
cmin = min(np.min(data['class1']), np.min(data['class2']))
cmax = max(np.max(data['class1']), np.max(data['class2']))
hp.mollview(data['class1'][0], title='class 1, Omega_matter=0.7', nest=True,  cmap='jet', min=cmin, max=cmax)
hp.mollview(data['class2'][0], title='class 2, Omega_matter=0.5', nest=True,  cmap='jet', min=cmin, max=cmax)

However, those maps have different power spectral densities.

In [None]:
def psd(x):
    '''Spherical Power Spectral Densities'''
    hatx = hp.map2alm(hp.reorder(x, n2r=True))
    return hp.alm2cl(hatx)

In [None]:
SAMPLE = 0
plt.semilogx(psd(data['class1'][SAMPLE]), label='class 1, Omega_matter=0.3, sample {}'.format(SAMPLE))
plt.semilogx(psd(data['class2'][SAMPLE]), label='class 2, Omega_matter=0.5, sample {}'.format(SAMPLE))
plt.legend();

When averaging over all the samples, and hence getting closer to the true PSD estimate, the statistical difference becomes obvious.

In [None]:
sample_psd_class1 = np.empty((nclass, 192))
sample_psd_class2 = np.empty((nclass, 192))

for i in range(nclass):
    sample_psd_class1[i] = psd(data['class1'][i])
    sample_psd_class2[i] = psd(data['class2'][i])

psd_class1 = np.mean(sample_psd_class1, axis=0)
psd_class2 = np.mean(sample_psd_class2, axis=0)

In [None]:
plt.semilogx(psd_class1, label='class 1, Omega_matter=0.3, mean')
plt.semilogx(psd_class2, label='class 2, Omega_matter=0.5, mean')
plt.legend();

## 2 Data preparation

Let us split the data into training and testing sets. The raw data is stored into `x_raw` and the power spectrum densities into `x_psd`.

In [None]:
# Put all the data in a single matrix
x_raw = np.vstack((data['class1'], data['class2']))
x_raw = x_raw / np.mean(x_raw**2) # Apply some normalization (We do not want to affect the mean)
x_psd = preprocessing.scale(np.vstack((sample_psd_class1, sample_psd_class2)))

# Create the label vector
labels = np.zeros([x_raw.shape[0]], dtype=int)
labels[nclass:] = 1

# Random train / test split
ntrain = 150
ret = train_test_split(x_raw, x_psd, labels, test_size=2*nclass-ntrain, shuffle=True)
x_raw_train, x_raw_test, x_psd_train, x_psd_test, labels_train, labels_test = ret

print('Class 1 VS class 2')
print('  Training set: {} / {}'.format(np.sum(labels_train==0), np.sum(labels_train==1)))
print('  Test set: {} / {}'.format(np.sum(labels_test==0), np.sum(labels_test==1)))

## 3 Classification using SVM

As a baseline, let us classify our data using an SVM classifier.

* An SVM based on the raw feature cannot discriminate the data because the dimensionality of the data is too large.
* We however observe that the PSD features are linearly separable.

In [None]:
def print_error(model, x, labels, name):
    pred = model.predict(x)
    error = sum(np.abs(pred - labels)) / len(labels)
    print('{} error: {:.2%}'.format(name, error))

In [None]:
clf = SVC(kernel='rbf')
clf.fit(x_raw_train, labels_train) 

print_error(clf, x_raw_train, labels_train, 'Training')
print_error(clf, x_raw_test, labels_test, 'Test')

In [None]:
clf = SVC(kernel='linear')
clf.fit(x_psd_train, labels_train) 

print_error(clf, x_psd_train, labels_train, 'Training')
print_error(clf, x_psd_test, labels_test, 'Test')

## 4 Classification using a spherical CNN

Let us now classify our data using a spherical convolutional neural network.

In [None]:
params = dict()
params['dir_name']       = 'sphere_whole'
params['num_epochs']     = 5
params['batch_size']     = 10
params['eval_frequency'] = 10

# Building blocks.
params['brelu']          = 'b1relu' # Relu
params['pool']           = 'apool1' # Average pooling

# Architecture.
params['nsides']         = [64, 32, 16] # Sizes of the laplacians are nsides * nisides * 12
params['F']              = [5, 10, 10]  # Number of graph convolutional filters.
params['K']              = [10, 10, 10]  # Polynomial orders.
params['batch_norm']     = [True, True, True]  # Batch norm
params['M']              = [100, 2]  # Output dimensionality of fully connected layers.

# Optimization.
params['regularization'] = 1e-4
params['dropout']        = 0.8
params['learning_rate']  = 1e-3
params['decay_rate']     = 0.95
params['momentum']       = 0.9
params['adam']           = True
params['decay_steps']    = ntrain / params['batch_size']

In [None]:
model = models.scnn(**params)

In [None]:
accuracy, loss, t_step = model.fit(x_raw_train, labels_train, x_raw_test, labels_test)

In [None]:
print_error(model, x_raw_train, labels_train, 'Training')
print_error(model, x_raw_test, labels_test, 'Test')