# Numerical Checking of t-SNE algorithm [$\mathcal{O}(n^2)$ version]

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import struct
import os

In [2]:
def read_results(file_name):
    """ Read results file
    """
    with open(file_name, 'rb') as fil:
        N, D = struct.unpack('II', fil.read(8))
        print(f"N={N}\tD={D}")
        Y = np.array(struct.unpack("{}d".format(N * D), fil.read(8 * N * D)))
        Y = Y.reshape(N, D)
    return Y

## X normalized

In [3]:
file_name = "X_normalized"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))

N=1000	D=784
N=1000	D=784


In [4]:
diff = np.abs(X_1 - X_2)

print(f"Max diff: {diff.max()}\nMin diff: {diff.min()}\nStd: {diff.std()}")

Max diff: 0.0
Min diff: 0.0
Std: 0.0


## Pairwise affinities $P_{i|j}$

In [5]:
file_name = "P"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))

N=1000	D=1000
N=1000	D=1000


In [6]:
diff = np.abs(X_1 - X_2)
re = diff / max(X_1.max(), X_2.max())

print(f"Max diff: {re.max()}\nMin diff: {re.min()}\nStd: {re.std()}")

Max diff: 0.0
Min diff: 0.0
Std: 0.0


## Symmetric affinities $P_{ij}$

In [7]:
file_name = "P_sym"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))

N=1000	D=1000
N=1000	D=1000


In [8]:
diff = np.abs(X_1 - X_2)

print(f"Max diff: {diff.max()}\nMin diff: {diff.min()}\nStd: {diff.std()}")

Max diff: 0.0
Min diff: 0.0
Std: 0.0


## Low dimensional affinities $Q_{ij}$

In [9]:
file_name = "Q_0"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))
diff = np.abs(X_1 - X_2)
re = diff / max(X_1.max(), X_2.max())

print(f"Max diff: {diff.max()}\nMin diff: {diff.min()}\nStd diff: {diff.std()}\n")
print(f"Max relative error: {diff.max()}\nMin relative error: {diff.min()}\nStd relative error: {diff.std()}")

N=1000	D=1000
N=1000	D=1000
Max diff: 4.2802955591003666e-07
Min diff: 0.0
Std diff: 3.9507874252386045e-08

Max relative error: 4.2802955591003666e-07
Min relative error: 0.0
Std relative error: 3.9507874252386045e-08


In [18]:
file_name = "Q_300"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))
diff = np.abs(X_1 - X_2)
re = diff / max(X_1.max(), X_2.max())

print(f"Max diff: {diff.max()}\nMin diff: {diff.min()}\nStd diff: {diff.std()}\n")
print(f"Max relative error: {diff.max()}\nMin relative error: {diff.min()}\nStd relative error: {diff.std()}")

N=1000	D=1000
N=1000	D=1000
Max diff: 3.504529999531769e-12
Min diff: 0.0
Std diff: 2.994292570633578e-13

Max relative error: 3.504529999531769e-12
Min relative error: 0.0
Std relative error: 2.994292570633578e-13


## Gradients $\frac{dC}{d\mathcal{Y}}$

In [11]:
file_name = "dC_0"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))
diff = np.abs(X_1 - X_2)
re = diff / max(X_1.max(), X_2.max())

print(f"Max diff: {diff.max()}\nMin diff: {diff.min()}\nStd diff: {diff.std()}\n")
print(f"Max relative error: {diff.max()}\nMin relative error: {diff.min()}\nStd relative error: {diff.std()}")

N=1000	D=2
N=1000	D=2
Max diff: 7.381749032366355e-06
Min diff: 5.569589148451108e-12
Std diff: 1.063526546164025e-06

Max relative error: 7.381749032366355e-06
Min relative error: 5.569589148451108e-12
Std relative error: 1.063526546164025e-06


In [19]:
file_name = "dC_300"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))
diff = np.abs(X_1 - X_2)
re = diff / max(X_1.max(), X_2.max())

print(f"Max diff: {diff.max()}\nMin diff: {diff.min()}\nStd diff: {diff.std()}\n")
print(f"Max relative error: {diff.max()}\nMin relative error: {diff.min()}\nStd relative error: {diff.std()}")

N=1000	D=2
N=1000	D=2
Max diff: 9.497611688383575e-10
Min diff: 3.128957258703107e-14
Std diff: 1.3294869535028852e-10

Max relative error: 9.497611688383575e-10
Min relative error: 3.128957258703107e-14
Std relative error: 1.3294869535028852e-10


## Output Embedding $\mathcal{Y}$

In [21]:
file_name = "Y_0"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))
diff = np.abs(X_1 - X_2)
re = diff / max(X_1.max(), X_2.max())

print(f"Max diff: {diff.max()}\nMin diff: {diff.min()}\nStd diff: {diff.std()}\n")
print(f"Max relative error: {diff.max()}\nMin relative error: {diff.min()}\nStd relative error: {diff.std()}")

N=1000	D=2
N=1000	D=2
Max diff: 0.0012382185750860783
Min diff: 1.430077980801224e-08
Std diff: 0.00018109633930966237

Max relative error: 0.0012382185750860783
Min relative error: 1.430077980801224e-08
Std relative error: 0.00018109633930966237


In [20]:
file_name = "Y_0_normalized"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))
diff = np.abs(X_1 - X_2)
re = diff / max(X_1.max(), X_2.max())

print(f"Max diff: {diff.max()}\nMin diff: {diff.min()}\nStd diff: {diff.std()}\n")
print(f"Max relative error: {diff.max()}\nMin relative error: {diff.min()}\nStd relative error: {diff.std()}")

N=1000	D=2
N=1000	D=2
Max diff: 0.001245202005563082
Min diff: 1.3511796938847393e-07
Std diff: 0.00018104555033411285

Max relative error: 0.001245202005563082
Min relative error: 1.3511796938847393e-07
Std relative error: 0.00018104555033411285


In [22]:
file_name = "Y_300"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))
diff = np.abs(X_1 - X_2)
re = diff / max(X_1.max(), X_2.max())

print(f"Max diff: {diff.max()}\nMin diff: {diff.min()}\nStd diff: {diff.std()}\n")
print(f"Max relative error: {diff.max()}\nMin relative error: {diff.min()}\nStd relative error: {diff.std()}")

N=1000	D=2
N=1000	D=2
Max diff: 4.053361724314898e-06
Min diff: 1.52427347636616e-10
Std diff: 5.143516072383088e-07

Max relative error: 4.053361724314898e-06
Min relative error: 1.52427347636616e-10
Std relative error: 5.143516072383088e-07


In [23]:
file_name = "Y_300_normalized"

X_1 = read_results(os.path.join("../implementations/original", "datum", file_name))
X_2 = read_results(os.path.join("../implementations/naive_tsne", "datum", file_name))
diff = np.abs(X_1 - X_2)
re = diff / max(X_1.max(), X_2.max())

print(f"Max diff: {diff.max()}\nMin diff: {diff.min()}\nStd diff: {diff.std()}\n")
print(f"Max relative error: {diff.max()}\nMin relative error: {diff.min()}\nStd relative error: {diff.std()}")

N=1000	D=2
N=1000	D=2
Max diff: 4.052686322730438e-06
Min diff: 6.618787393569448e-10
Std diff: 5.140223577919975e-07

Max relative error: 4.052686322730438e-06
Min relative error: 6.618787393569448e-10
Std relative error: 5.140223577919975e-07
