In [None]:
%load_ext autoreload
%autoreload 2
%pylab inline

In [None]:
import healpy as hp
import matplotlib.pylab as pl

# Load some spherical data 

Let us load two maps with the same PSD and different high order statistics.

In [None]:
img1 = hp.read_map('data/same_psd/kappa_omega_m_0p3.fits')
img2 = hp.read_map('data/same_psd/kappa_omega_m_0p26.fits')
img1 = hp.reorder(img1,r2n=True)
img2 = hp.reorder(img2,r2n=True)

Down sampling of the maps

In [None]:
Nside = 1024
img1 = hp.ud_grade(img1,nside_out=Nside, order_in='NESTED')
img2 = hp.ud_grade(img2,nside_out=Nside, order_in='NESTED')

Let us display the two maps

In [None]:
cmin = min(np.min(img1), np.min(img2))
cmax = max(np.max(img1), np.max(img2))
hp.mollview(img1, title='Map 1, omega_m=0.31, pk_norm=0.82, h=0.7', nest=True, min=cmin ,max=cmax)
hp.mollview(img2, title='Map 2, omega_m=0.26, sigma_8=0.91, h=0.7', nest=True, min=cmin ,max=cmax)

Let us cut the sphere into 192 smaller subparts. We display 16 of them bellow.

In [None]:
order = 4

index = np.array(list(range(hp.nside2npix(order))))+2
mask = np.zeros(index.shape,dtype=np.int)
mask[:order**2] = 1
index  *= mask
hp.mollview(index, title='Some sphere subparts', nest=True)

marker = np.zeros(hp.nside2npix(order))
marker[0] = 1
hp.mollview(marker, title='Selected indexes', nest=True)

## Split the data

In [None]:
def hp_split(img, order, nest=True):
    ''' This function split the data of different part of the sphere. 
        Return the splitted data and some possible index on the sphere
    '''
    npix = len(img)
    nside = hp.npix2nside(npix)
    if hp.nside2order(nside) < nside:
        ValueError('Order not compatible with data')
    if not nest:
        NotImplementedError('Implement the change of coordidinate')
    nsample = 12 * (order**2)
    return img.reshape([nsample,npix//nsample]), np.array(list(range(npix//nsample)))
    

In [None]:
data = dict()
data['class1'], _ = hp_split(img1, order=4)
data['class2'], index = hp_split(img2, order=4)

In [None]:
print('The data is of shape {}'.format(data['class1'].shape))

Let us diplay one data sample on the entire sphere.

In [None]:
# npix = hp.nside2npix(nside)
# mask = np.ones([npix])>0
# mask[index] = False
# hp.ma()
imgt = img1.copy()
imgt[data['class1'].shape[1]:]=hp.UNSEEN
imgt = hp.ma(imgt)

In [None]:
projected_map = hp.mollview(imgt,nest=True, return_projected_map = True, xsize = 1600 )

In [None]:
plt.imshow(projected_map[380:520, 530:670])

# Preparing data for the classifier
Let us split the data into training and testing sets. The raw data is stored into `x_raw` and the histograms into `x_trans`. As a transformation, we cannot use the power spectrum density. Hence we do an histogram of the data.

In [None]:
def trans(x,cmin,cmax):
    if len(x.shape)>1:
        d = []
        for el in x:
            y,_ = np.histogram(el, bins=100, range=[cmin,cmax])
            d.append(y.astype(float))
        return np.array(d)
    else:
        y,_ = np.histogram(x, bins=100, range=[cmin,cmax])
        return y.astype(float)


In [None]:
from sklearn import preprocessing


# Put all the data in a single matrix
x_raw = np.vstack((data['class1'],data['class2']))
x_raw = x_raw / np.mean(x_raw*x_raw) # Apply some normalization (We do not want to affect the mean)
cmin = np.min(x_raw)
cmax = np.max(x_raw)
x_trans = trans(x_raw, cmin, cmax)
x_trans = preprocessing.scale(x_trans)



# Create the label vector
labels = np.zeros([x_raw.shape[0]])
labels[len(data['class1']):] = 1
labels = labels.astype(np.int)

# Random reordering
p = np.random.permutation(len(labels))
x_raw = x_raw[p,]
x_trans = x_trans[p,]
labels = labels[p]

ntrain = 300
ntest = len(x_raw) - ntrain
x_raw_train = x_raw[:ntrain]
x_trans_train = x_trans[:ntrain]
labels_train = labels[:ntrain]
x_raw_test = x_raw[ntrain:]
x_trans_test = x_trans[ntrain:]
labels_test = labels[ntrain:]

print('Class 1 VS class 2 \n  Training set: {} / {}\n  Testing set: {} / {}'.format(
    ntrain-sum(labels_train), sum(labels_train), ntest-sum(labels_test), sum(labels_test) ))

# Classification using SVM
Let us test classify our data using an SVM classifier

While running an SVM classifier on the data will fail because of their dimensionality, we observe that we can correctly classify our dataset using the histogram.

Make with an histogram

In [None]:
from sklearn.svm import SVC

clf_raw = SVC(kernel='rbf')
clf_raw.fit(x_raw_train, labels_train) 

print('Errors on training: {}%'.format(
    sum(np.abs(clf_raw.predict(x_raw_train)-labels_train))/ntrain*100))
print('Errors on testing: {}%'.format(
    sum(np.abs(clf_raw.predict(x_raw_test)-labels_test))/ntest*100))

In [None]:
from sklearn.svm import SVC


clf_raw = SVC(kernel='rbf')
clf_raw.fit(x_trans_train, labels_train) 

print('Errors on training: {}%'.format(
    sum(np.abs(clf_raw.predict(x_trans_train)-labels_train))/ntrain*100))
print('Errors on testing: {}%'.format(
    sum(np.abs(clf_raw.predict(x_trans_test)-labels_test))/ntest*100))

# Plot the histogram features
Let us first plot the mean and then each feature individually

In [None]:
# plot(np.mean(trans(data['class1'], cmin, cmax),axis=0))
# plot(np.mean(trans(data['class2'], cmin, cmax),axis=0))
x_trans = trans(x_raw, cmin, cmax)
fig = plt.figure()
plot(np.mean(x_trans[labels==0],axis=0))
plot(np.mean(x_trans[labels==1],axis=0))
title('Mean of the classes')
fig = plt.figure()
plot(x_trans[labels==0].T, 'b')
plot(x_trans[labels==1].T, 'r')
_, title('Individual samples')

# Classification using a spherical CNN
Let us now classify our data using a spherical convolutional neural network.

In [None]:
from scnn import models

In [None]:
nsides = [Nside, Nside, Nside//2, min(Nside//8, 128)]
# nsides = [2048, 1024, 256, 64]
# nsides = [128, 32, 16]

indexes = []
nsample = 12 * (order**2)
for nside in nsides:
    indexes.append(np.array(list(range(hp.nside2npix(nside)//nsample))))


In [None]:
C = 2 # number of class

params = dict()
params['dir_name']       = 'test'
params['num_epochs']     = 10
params['batch_size']     = 20
params['eval_frequency'] = 10

# Building blocks.
params['brelu']          = 'b1lrelu' # Relu 
params['pool']           = 'apool1' # Average pooling

# Architecture.
params['nsides']         = nsides # Sizes of the laplacians are nsides * nisides * 12
params['indexes']        = indexes # Sizes of the laplacians are nsides * nisides * 12
params['F']              = [5, 20, 80, 10]  # Number of graph convolutional filters.
params['K']              = [10, 10, 10, 10]  # Polynomial orders.
params['batch_norm']     = [True, True, True, True]  # Batch norm
params['M']              = [100, C]  # Output dimensionality of fully connected layers.

# Optimization.
params['regularization'] = 2e-4
params['dropout']        = 0.8
params['learning_rate']  = 10e-4
params['decay_rate']     = 0.95
params['momentum']       = 0.9
params['adam']           = True
params['decay_steps']    = ntrain / params['batch_size']

In [None]:
model = models.scnn(**params)

In [None]:
accuracy, loss, t_step = model.fit(x_raw_train, labels_train, x_raw_test, labels_test)

In [None]:
print('Errors on training: {}%'.format(
    sum(np.abs(model.predict(x_raw_train)-labels_train))/ntrain*100))
print('Errors on testing: {}%'.format(
    sum(np.abs(model.predict(x_raw_test)-labels_test))/ntest*100))

## Comments about the results
Without subsampling
I train the spherical CNN a few minutes on CPU and I obtain 96% validation accuracy. 

SVM is consistenly failling with the raw data but succeed with the histograms.

Conclusion: the spherical CNN is able to discriminate over data with the same mean and same PSD using only 192th of the sphere.

Effect of subsampling
 - N=512, errors on training/testing: 11.66%, 78.57% => complete fail
 - N=1024, errors on training/testing: 0%, 0-3% => partial success
 - N=2048, errors on training/testing: 0%, 3% => partial success

Maybe this is also due to the fact that the training/validation sets are not the same for each run.
    

## Some other plotting

In [None]:
# from scnn import utils
# nside_v = 32
# nsample = 12 * (order**2)
# ind = np.array(list(range(hp.nside2npix(nside_v)//nsample)))
# G = utils.healpix_graph(nside=nside_v, nest=True, indexes=ind)

# G.plot()
