# Heat kernel

<font color=red>Something is wrong here! The kernel matrix shouldn't contain lots of zeros in the lower right corner according to my intuition.</font>

In [1]:
sigma = 0.0001

## Load packages

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import sys
path = '../../scripts/'
sys.path.insert(0,path)
from RipserToDict import ripser_to_dict
from PlotPersistence import plot_persistence
import pickle
import random

## Functions

In [3]:
def heat_kernel(F,G,sigma):
    
    """Let F and G be PH matrices of dimensions n_points x 2, where the columns are birth and death respectively.
    The matrices are thus only for one dim each, e.g. 0 or 1 or 2.
    sigma is a scalar parameter which may be taken to be 0.1.
    The output is the heat kernel"""

    kernel = 0
    for z in G.copy():
        for y in F.copy():
            kernel += np.exp(-np.linalg.norm(y - z,ord=2)**2/(8.*sigma))
            kernel += -np.exp(-np.linalg.norm(y - np.flipud(z),ord=2)**2/(8.*sigma))
    kernel *= 1./(8.*np.pi*sigma)

    return kernel

In [4]:
def heat_kernel_distance(F,G,sigma):
    
    """Let F and G be PH matrices of dimensions n_points x 2, where the columns are birth and death respectively.
    The matrices are thus only for one dim each, e.g. 0 or 1 or 2.
    sigma is a scalar parameter which may be taken to be 0.1.
    The output is the heat kernel distance"""
    
    inner = heat_kernel(F,F,sigma) + heat_kernel(G,G,sigma) - 2*heat_kernel(F,G,sigma)
    distance = np.sqrt(heat_kernel(F,F,sigma) + heat_kernel(G,G,sigma) - 2*heat_kernel(F,G,sigma))
    
    return distance

## Create labels and train kernel

In [5]:
# load train set
train_seeds = range(0,10)
train_persistences = []
train_labels = []
for seed in train_seeds:
    
    # sphere
    with open('../../../heavy_files/exercises/ml_on_1_to_3/sphere_persistences/' + str(seed) + '.txt','rb') as f:
        sphere_temp = pickle.load(f)
    train_persistences.append(sphere_temp.copy())
    sphere_temp = {}
    train_labels.append(0)
    
    # torus
    with open('../../../heavy_files/exercises/ml_on_1_to_3/torus_persistences/' + str(seed) + '.txt','rb') as f:
        torus_temp = pickle.load(f)
    train_persistences.append(torus_temp.copy())
    torus_temp = {}
    train_labels.append(0)

In [6]:
# permute
combined = list(zip(train_persistences.copy(), train_labels))
random.shuffle(combined)
train_persistences[:], train_labels[:] = zip(*combined)

In [7]:
train_kernel = np.zeros((len(train_labels),len(train_labels)))
train_homology = [train_persistences[x][1] for x in train_seeds]
for x, i in enumerate(train_homology.copy()):
    for y, j in enumerate(train_homology.copy()):
        train_kernel[x,y] = heat_kernel(i,j,sigma)
print(train_kernel)

[[  3.05008619e+04   5.41424416e-07   4.88701557e+02   2.24465079e+04
    1.80279410e+04   1.95158185e+04   2.70886577e-03   1.97920995e+04
    2.05062617e+04   2.16623099e+04   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  5.41424416e-07   3.61617142e+04   1.85552780e+04   6.10299694e+01
    4.32461049e-07   1.03403923e+02   1.77534971e+04   6.25701456e+01
    1.43846618e+01   1.96786360e+02   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  4.88701557e+02   1.85552780e+04   6.39977740e+04   5.46425161e+01
    1.00341088e+01   4.54561688e+02   2.41043255e+04   2.53599164e+02
    4.72656598e+01   1.20934895e+01   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    0.00000000e+00

In [8]:
# export
with open('train_kernel.txt','wb') as f:
    pickle.dump(train_kernel,f)

with open('train_labels.txt','wb') as f:
    pickle.dump(train_labels,f)

## Create test kernel and test labels

In [9]:
# load test set
test_seeds = range(10,20)
test_persistences = []
test_labels = []
for seed in test_seeds:
    
    # sphere
    with open('../../../heavy_files/exercises/ml_on_1_to_3/sphere_persistences/' + str(seed) + '.txt','rb') as f:
        test_persistences.append(pickle.load(f))
    test_labels.append(0)
    
    # torus
    with open('../../../heavy_files/exercises/ml_on_1_to_3/torus_persistences/' + str(seed) + '.txt','rb') as f:
        test_persistences.append(pickle.load(f))
    test_labels.append(1)

In [10]:
# permute
combined2 = list(zip(test_persistences, test_labels))
random.shuffle(combined2)
test_persistences[:], test_labels[:] = zip(*combined2)

In [11]:
test_kernel = np.zeros((len(test_labels),len(test_labels)))
test_homology = [test_persistences[x][1] for x in range(len(test_seeds))]
for x, i in enumerate(test_homology):
    for y, j in enumerate(test_homology):
        test_kernel[x,y] = heat_kernel(i,j,sigma)
print(test_kernel)

[[  5.28399098e+04   8.93946328e+02   2.85564207e+02   1.77729476e+02
    1.30526949e+02   2.01900455e+04   2.79800914e+02   1.61159784e+02
    3.52082942e+04   7.89424820e+02   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  8.93946328e+02   4.89753933e+04   2.06700348e+04   2.24850625e+04
    1.54531275e+04   8.66585898e+02   1.96185813e+04   2.05106189e+04
    1.43452908e+03   2.06124684e+04   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00]
 [  2.85564207e+02   2.06700348e+04   4.41811139e+04   2.67901014e+04
    2.24771822e+04   3.59614753e-03   2.28310408e+04   2.66999939e+04
    2.75039393e+01   2.26288640e+04   0.00000000e+00   0.00000000e+00
    0.00000000e+00   0.00000000e+00   0.00000000e+00   0.00000000e+00
    0.00000000e+00

In [12]:
# export
with open('test_kernel.txt','wb') as f:
    pickle.dump(test_kernel,f)

with open('test_labels.txt','wb') as f:
    pickle.dump(test_labels,f)