# t-Distributed Stochastic Neighbor Embedding (T-SNE)

In [None]:
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
import pandas as pd
import matplotlib.pyplot as plt
from dimensionality_red.styler import style_dataframe

# Generate random high-dimensional data (100 points, 50 features)
np.random.seed(42)
X = np.random.rand(100, 50)


## Step 1 - Conditional Probabilities in High Dimensional Space

In [None]:
distances = euclidean_distances(X, X)

In [None]:
def compute_p_ij(distances, sigma=1.0):
    P = np.exp(-distances ** 2 / (2 * sigma ** 2))
    P /= np.sum(P, axis=1, keepdims=True)  
    return (P + P.T) / (2 * len(P))  

In [None]:
P = compute_p_ij(distances)
print("Shape of P:", P.shape)

## Step 2 - Compare similarities in 2D space using T-Distribution

In [None]:
def compute_q_dist(Y):
    distances_low = euclidean_distances(Y, Y)
    Q = (1 + distances_low ** 2) ** -1
    np.fill_diagonal(Q, 0) 
    Q /= np.sum(Q)
    return Q

In [None]:
np.random.seed(42)
Y = np.random.randn(100, 2)

In [None]:
Q = compute_q_dist(Y)
print("Shape of Q:", Q.shape)

In [None]:
from modelviz.relationships import plot_similarity
plot_similarity(data=X, point_of_interest=X[0], 
                std_range=3, similarity_color='black', 
                curve_color='grey', seaborn_style='whitegrid')

## Step 3 - Use KL Divergence to compare distributions

In [None]:
def kl_divergence(P, Q):
    return np.sum(P * np.log((P + 1e-9) / (Q + 1e-9)))  

In [None]:
loss = kl_divergence(P=P, Q=Q)

In [None]:
print(f"KL divergence loss is: {loss}")

## Step 4 - Iteration to minimize Cost Function (C)

In [None]:
def update_Y(Y, P, Q, learning_rate=0.1):
    gradients = np.zeros_like(Y)  

    for i in range(Y.shape[0]):
        diff = Y[i] - Y  
        grad = 4 * np.sum((P[i, :, None] - Q[i, :, None]) * diff * 
                          (1 + np.sum(diff ** 2, axis=1, keepdims=True)) ** -1, axis=0)
        gradients[i] = grad 
    Y -= learning_rate * gradients  
    return Y  


In [None]:
ITERATION = 500

losses = []
iters = []
for idx, _ in enumerate(range(ITERATION)):  
    Q = compute_q_dist(Y)
    Y = update_Y(Y, P, Q)
    loss = kl_divergence(P, Q)
    if idx % 50 == 0:
        print(f"\U0000231B ITERATION: {idx + 1} Loss: {loss:.4f}")
    losses.append(loss)
    iters.append(idx + 1)


## Step 5 - Visualize Gradient Descent over Cost Function (KL Divergence Loss)

In [None]:
import seaborn as sns
plt.figure(figsize=(8, 5))
sns.lineplot(x=iters, y=losses, marker='o', 
             color='black', 
             label="KL Divergence Loss")
plt.xlabel("Iteration")
plt.ylabel("Loss Value")
plt.title("KL (Cost) Function - minimization")
plt.legend()
plt.grid(True)
plt.show()