In [4]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import manifold

## Create 3D Gaussians

In [5]:
dim = 3
n_gauss = 2
n_pts_per_gauss = 300
np.random.seed(5)

# centers = np.zeros((n_gauss,dim))
# for i in range(1,n_gauss):
#     centers[i] = np.random.randint(0,2,3)
centers = np.random.uniform(-1,1,size=(n_gauss,3))
    
print(centers)

cov_m = [np.diag([0.01 for i in range(dim)]),np.diag([0.01 if i%2 !=0 else 0.01 for i in range(dim)])]

D = np.zeros((n_pts_per_gauss*n_gauss,dim))
c = np.zeros(n_pts_per_gauss*n_gauss)      # storage for labels
for i in range(n_gauss):
    k = np.random.randint(0,2,1)[0]
    D[i*n_pts_per_gauss:(i+1)*n_pts_per_gauss] = np.random.multivariate_normal(centers[i],cov_m[k],n_pts_per_gauss)
    c[i*n_pts_per_gauss:(i+1)*n_pts_per_gauss] = i 
D = (D-np.min(D,axis=0))/(np.max(D,axis=0)-np.min(D,axis=0))
print(D.shape)
print(c.shape)

[[-0.55601366  0.74146461 -0.58656169]
 [ 0.83722182 -0.02317762  0.22348773]]
(600, 3)
(600,)


In [6]:
%matplotlib qt

# colors = ['r', 'g', 'b']  # Red, Green, Blue
colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF']
# Create a figure and 3D axis
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(projection='3d')

# Define colors for each Gaussian distribution

# Loop through each Gaussian to plot points with corresponding color
for i in range(n_gauss):
    ax.scatter(D[c == i, 0], D[c == i, 1], D[c == i, 2], color=colors[i], label=f'Gaussian {i+1}')
    # ax.scatter(D[:,0], D[:,1], D[:,2], c=c)

# Set labels and title
ax.set_xlabel('X-axis')
ax.set_ylabel('Y-axis')
ax.set_zlabel('Z-axis')
ax.set_title('3D Scatter Plot of Data Points from Three Gaussian Distributions')

# Add a legend
ax.legend()

# Show the plot
plt.show()

## Project 3D to 2D using T-SNE

In [7]:
t_sne = manifold.TSNE(
    n_components=2,
    perplexity=30,
    init="random",
    random_state=0,
)

S = t_sne.fit_transform(D)

# Plot T-SNE Output (3D --> 2D )

In [8]:
# Plotting the t-SNE results with the same color scheme
%matplotlib qt

# colors = ['r', 'g', 'b','']  # Red, Green, Blue
plt.figure(figsize=(10, 8))
for i in range(n_gauss):
    plt.scatter(S[c == i, 0], S[c == i, 1], color=colors[i], label=f'Gaussian {i+1}')
    # plt.scatter(S[c == i, 0], S[c == i, 1], label=f'Gaussian {i+1}')

plt.title('t-SNE Visualization of 3D Gaussian Distributions into 2D')
plt.legend()
plt.grid(True)
plt.show()

# Inverse Projection

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from sklearn.model_selection import train_test_split

# Define the MLP inverse_model
class NNinv(nn.Module):
    def __init__(self, input_size, output_size):
        super(NNinv, self).__init__()
        
        # Define the layers
        self.layers = nn.Sequential(
            nn.Linear(input_size, 64),  # Input to first hidden layer
            nn.ReLU(),
            nn.Linear(64, 128),  # First hidden layer to second hidden layer
            nn.ReLU(),
            nn.Linear(128, 256),  # Second hidden layer to third hidden layer
            nn.ReLU(),
            nn.Linear(256, 512),  # Third hidden layer to fourth hidden layer
            nn.ReLU(),
            nn.Linear(512, output_size),  # Fifth hidden layer to output
            nn.Sigmoid()  # Output layer with sigmoid activation
        )
    
    def forward(self, x):
        return self.layers(x)
    

X_train, X_test, y_train, y_test, c_train, c_test = train_test_split(S, D,c, test_size=0.33, random_state=42, stratify=c)


In [10]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
print(c_train.shape)
print(c_test.shape)

(402, 2)
(198, 2)
(402, 3)
(198, 3)
(402,)
(198,)


In [11]:
# Example usage
input_size = 2  # Example input size (can be changed)
output_size = dim   # Binary classification (sigmoid output for single output)

# Create DataLoader for batch processing
batch_size = 64
t_X_train = torch.tensor(X_train)
t_y_train = torch.tensor(y_train)
dataset = TensorDataset(t_X_train, t_y_train)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Instantiate the inverse_model, loss function, and optimizer
inverse_model = NNinv(input_size, output_size)
loss_fn = nn.L1Loss()  # Mean Absolute Error (MAE)
optimizer = optim.Adam(inverse_model.parameters(), lr=0.001)

# Number of epochs to train
num_epochs = 5

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, (inputs, targets) in enumerate(dataloader):
        # Forward pass
        outputs = inverse_model(inputs)
        loss = loss_fn(outputs, targets)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    # Print the average loss for the epoch
    avg_loss = running_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

print("Training complete.")

t_X_test = torch.tensor(X_test)
t_y_test = torch.tensor(y_test)
outputs_test = inverse_model(t_X_test)
loss_test = loss_fn(outputs_test, t_y_test)
print(loss_test/y_test.shape[0])

Epoch [1/5], Loss: 0.1416
Epoch [2/5], Loss: 0.0841
Epoch [3/5], Loss: 0.0742
Epoch [4/5], Loss: 0.0720
Epoch [5/5], Loss: 0.0696
Training complete.
tensor(0.0003, dtype=torch.float64, grad_fn=<DivBackward0>)


In [12]:
%matplotlib qt

# Create a figure and 3D axis
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(projection='3d')

# Define colors for each Gaussian distribution
# colors = ['r', 'g', 'b']  # Red, Green, Blue


output_fin = outputs_test.detach().numpy()
# Loop through each Gaussian to plot points with corresponding color
for i in range(n_gauss):
    ax.scatter(t_y_test[c_test == i, 0], t_y_test[c_test == i, 1], t_y_test[c_test == i, 2], color=colors[i], label=f'Actual_Gaussian {i+1}')
    # ax.scatter(output_fin[c_test == i, 0], output_fin[c_test == i, 1], output_fin[c_test == i, 2], color='orange', label=f'Predicted_Gaussian {i+1}')

ax.scatter(output_fin[:, 0], output_fin[:, 1], output_fin[:, 2], color='orange', label=f'Predicted_Gaussians')

# Set labels and title
ax.set_xlabel('X-axis')
ax.set_ylabel('Y-axis')
ax.set_zlabel('Z-axis')
ax.set_title('TSNE \n Actual Vs Prediction')

# Add a legend
ax.legend()

# Show the plot
plt.show()

# Create interpolated points only

In [13]:
# Calculate centroids
centroid_1 = np.mean(S[c ==0], axis=0)
print(centroid_1)
centroid_2 = np.mean(S[c ==1], axis=0)
print(centroid_2)

[-22.76501   11.038555]
[ 19.923922  -12.6635275]


In [14]:
# Number of points to interpolate along the line
n_interpolated_points = 100

# Generate interpolated points along the line
interpolated_points = np.linspace(centroid_1, centroid_2, n_interpolated_points)

# Plot original points, centroids, and interpolated line
# plt.scatter(gaussian_1_2d[:, 0], gaussian_1_2d[:, 1], color='blue', label="Gaussian 1")
# plt.scatter(gaussian_2_2d[:, 0], gaussian_2_2d[:, 1], color='green', label="Gaussian 2")
# plt.plot(interpolated_points[:, 0], interpolated_points[:, 1], 'k--', label="Connecting Line")

plt.figure(figsize=(10, 8))
plt.scatter(S[c == 0, 0], S[c == 0, 1], color='blue', label="Gaussian 1")
plt.scatter(S[c == 1, 0], S[c == 1, 1], color='green', label="Gaussian 2")
# plt.plot(interpolated_points[:, 0], interpolated_points[:, 1], 'k--', label="Connecting Line")
plt.scatter(interpolated_points[:, 0], interpolated_points[:, 1], label="Connecting Line")

# Mark centroids
plt.scatter(centroid_1[0], centroid_1[1], color='blue', marker='X', s=100, label="Centroid 1")
plt.scatter(centroid_2[0], centroid_2[1], color='green', marker='X', s=100, label="Centroid 2")

plt.legend()
plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.title("Connecting Line with Interpolated Points Between Centroids")
plt.show()


In [15]:
new_points_test = torch.tensor(interpolated_points).float()
outputs_new_points = inverse_model(new_points_test)
outputs_new_points =outputs_new_points.detach().numpy()

In [16]:
%matplotlib qt

# Create a figure and 3D axis
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(projection='3d')

# # Define colors for each Gaussian distribution
# colors = ['r', 'g', 'b']  # Red, Green, Blue

# Loop through each Gaussian to plot points with corresponding color
for i in range(n_gauss):
    ax.scatter(D[c == i, 0], D[c == i, 1], D[c == i, 2], color=colors[i], alpha=0.7, label=f'Gaussian {i+1}')

# ax.scatter(outputs_new_points[:, 0], outputs_new_points[:, 1], outputs_new_points[:, 2], s=150, label=f'New_points_Gaussian')
ax.scatter(outputs_new_points[:, 0], outputs_new_points[:, 1], outputs_new_points[:, 2], label=f'New_points_Gaussian')

# ax.scatter(outputs_new_points[:, 0], outputs_new_points[:, 1], outputs_new_points[:, 2], color='k', label=f'Predicted_Gaussian {i+1}')

# Set labels and title
ax.set_xlabel('X-axis')
ax.set_ylabel('Y-axis')
ax.set_zlabel('Z-axis')
ax.set_title(' New points (2D TSNE) mapping into 3D Gaussian Distributions')

# Add a legend
ax.legend()

# Show the plot
plt.show()

# Create new points with labels

In [17]:
# import numpy as np
# import matplotlib.pyplot as plt
# from sklearn.manifold import TSNE
from scipy.spatial.distance import cdist

# # Generate two 3D Gaussian distributions
# gaussian_1 = np.random.multivariate_normal([1, 1, 1], np.eye(3) * 0.1, 100)
# gaussian_2 = np.random.multivariate_normal([3, 3, 3], np.eye(3) * 0.1, 100)

# # Concatenate the data for t-SNE
# data = np.vstack((gaussian_1, gaussian_2))
# labels = np.array([0] * 100 + [1] * 100)

# # Perform t-SNE to reduce to 2D
# tsne = TSNE(n_components=2, random_state=0)
# data_2d = tsne.fit_transform(data)

# # Calculate centroids in 2D
# centroid_1 = np.mean(data_2d[labels == 0], axis=0)
# centroid_2 = np.mean(data_2d[labels == 1], axis=0)

# Number of points to generate between centroids
n_new_points = 100

# # Generate points along the line between centroids
interpolated_points = np.random.uniform(0, 1, n_new_points)[:, None] * (centroid_2 - centroid_1) + centroid_1

# # Add noise to spread points around the line
noise = np.random.normal(0, 0.05, interpolated_points.shape)  # Adjust spread with the 0.05 parameter
new_points = interpolated_points + noise

# # Assign labels based on proximity to centroids
# new_points = interpolated_points
dist_to_centroid1 = cdist(new_points, centroid_1[None, :])
dist_to_centroid2 = cdist(new_points, centroid_2[None, :])

new_labels = np.where(dist_to_centroid1 < dist_to_centroid2, 0, 1)

# # Plot original points, centroids, and new points
# # Create a figure and 3D axis
fig = plt.figure(figsize=(10, 8))
plt.scatter(S[c == 0, 0], S[c == 0, 1], color='blue', label="Gaussian 1")
plt.scatter(S[c == 1, 0], S[c == 1, 1], color='green', label="Gaussian 2")
# plt.scatter(S[c == 0][:, 0], data_2d[labels == 0][:, 1], color='blue', alpha=0.5, label="Gaussian 1")
# plt.scatter(data_2d[labels == 1][:, 0], data_2d[labels == 1][:, 1], color='green', alpha=0.5, label="Gaussian 2")
plt.scatter(new_points[:, 0], new_points[:, 1], c=new_labels, cmap='coolwarm', alpha=0.6, label="New Points")
plt.scatter(centroid_1[0], centroid_1[1], color='blue', marker='X', s=100, label="Centroid 1")
plt.scatter(centroid_2[0], centroid_2[1], color='green', marker='X', s=100, label="Centroid 2")

plt.legend()
plt.xlabel("t-SNE Component 1")
plt.ylabel("t-SNE Component 2")
plt.title("New Points with Labels Based on Proximity to Centroids")
plt.show()

# # ##Save the new points and labels
# # ##np.save("new_generated_points.npy", new_points)
# # ##np.save("new_generated_labels.npy", new_labels)

# ##print("New points and labels saved.")


In [18]:
print(len(interpolated_points))
print(interpolated_points.shape)
print(dist_to_centroid2.shape)
print(new_labels.shape)

100
(100, 2)
(100, 1)
(100, 1)


In [19]:
new_labels

array([[1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
    

In [20]:
new_labels.flatten()

array([1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0])

In [21]:
new_points_test_with_labels = torch.tensor(new_points).float()
outputs_new_points_lbl = inverse_model(new_points_test_with_labels)
outputs_new_points_lbl =outputs_new_points_lbl.detach().numpy()

In [22]:
%matplotlib qt

# Create a figure and 3D axis
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(projection='3d')

# # Define colors for each Gaussian distribution
# colors = ['r', 'g', 'b']  # Red, Green, Blue
new_labels = new_labels.flatten()
# Loop through each Gaussian to plot points with corresponding color
for i in range(n_gauss):
    ax.scatter(D[c == i, 0], D[c == i, 1], D[c == i, 2], color=colors[i], alpha=0.7, label=f'Gaussian {i+1}')

    # ax.scatter(outputs_new_points_lbl[new_labels == i, 0], outputs_new_points_lbl[new_labels == i, 1], outputs_new_points_lbl[new_labels == i, 2],cmap='coolwarm', label=f'New_points_Gaussian')
ax.scatter(outputs_new_points_lbl[:, 0], outputs_new_points_lbl[:, 1], outputs_new_points_lbl[:, 2], c= new_labels, cmap='coolwarm', label=f'New_points_Gaussian')

# Set labels and title
ax.set_xlabel('X-axis')
ax.set_ylabel('Y-axis')
ax.set_zlabel('Z-axis')
ax.set_title(' New points with labels(2D TSNE) mapping into 3D Gaussian Distributions')

# Add a legend
ax.legend()

# Show the plot
plt.show()