In [43]:
import geomstats.backend as gs
import geomstats.datasets.utils as data_utils
from sklearn.model_selection import KFold
import numpy as np

In [36]:
data, patient_ids, labels = data_utils.load_connectomes()

In [37]:
print(data.shape)

(86, 28, 28)


In [38]:
labels

array([1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1,
       1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1])

In [40]:
from geomstats.learning.mdm import RiemannianMinimumDistanceToMeanClassifier
from sklearn.model_selection import RepeatedKFold

In [57]:
n = 28
n_classes = 2
from geomstats.geometry.spd_matrices import SPDMatrices, SPDMetricAffine, SPDMetricBuresWasserstein, SPDMetricLogEuclidean, SPDMetricEuclidean
metric_list = [SPDMetricAffine(n), SPDMetricAffine(n, power_affine=0.5), SPDMetricAffine(n, power_affine=-0.5), SPDMetricBuresWasserstein(n), SPDMetricLogEuclidean(n), SPDMetricEuclidean(n)]

rkf = RepeatedKFold(n_splits=2, n_repeats=100, random_state=2652124)

In [63]:
accuracy_list = []
rkf = RepeatedKFold(n_splits=2, n_repeats=100, random_state=2652124)
for train_index, test_index in rkf.split(data):
    X_train, X_test = data[train_index], data[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    metric_wise = []
    for metric in metric_list:
        mdm = RiemannianMinimumDistanceToMeanClassifier(metric, n_classes)
        mdm.fit(X_train, y_train)
        y=mdm.predict(X_test)
        acc = (y == y_test).mean()
        metric_wise.append(acc)
    accuracy_list.append(np.array(metric_wise))
accuracy = np.array(accuracy_list).mean(axis=0)

In [62]:
print(accuracy)

[0.70585796 0.70554187 0.70554187 0.69367406 0.70511494 0.67876847]


In [55]:
print("SPDMetricAffine", accuracy[0])
print("SPDMetricBuresWasserstein", accuracy[1])
print("SPDMetricLogEuclidean", accuracy[2])
print("SPDMetricEuclidean", accuracy[3])

SPDMetricAffine 0.6832558139534882
SPDMetricBuresWasserstein 0.6709302325581398
SPDMetricLogEuclidean 0.6833720930232559
SPDMetricEuclidean 0.6523255813953489


**what is done** :

86 examples was split into 2 folds and experiments is repeated with 100 random shuffles.

**Conclusion** :
 
Intrinsic Metric perform well than Euclidean. Affine, Log Euclidean are top. But This dataset is pretty small (just 86 examples) to make any reliable conclusion. 

In [68]:
import cv2 
gray_img = np.ones((10,10))
print(gray_img.shape)
dx = cv2.filter2D(gray_img, cv2.CV_64F, np.mat([-1,0,1]))

print(dx)

(10, 10)
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [73]:
mat = np.zeros((20,20))
print(mat.shape)
print(mat[1:-1,1:-1].shape)

(20, 20)
(18, 18)
