Synthetic Data Set

In [36]:
import numpy as np
import pandas as pd

# Set the random seed for reproducibility
np.random.seed(42)

# Number of points
n_points = 100

# Number of dimensions
n_dimensions = 128

# Number of clusters
n_clusters = 6

# Cluster centers randomly placed in 128-dimensional space
cluster_centers = np.random.rand(n_clusters, n_dimensions) * 10

# Standard deviation of clusters
cluster_std = np.random.rand(n_clusters) * 1.5 + 0.5  # Keep the spread reasonable

# Initialize an empty array to hold all the points
points = np.zeros((n_points, n_dimensions))

# Labels array to store unique identifiers
labels = [f"Point_{i+1}" for i in range(n_points)]

# Generate points around each cluster
for i in range(n_points):
    # Choose a cluster based on uniform distribution
    cluster_id = np.random.choice(n_clusters)

    # Generate a point based on normal distribution around the cluster center
    point = np.random.normal(loc=cluster_centers[cluster_id], scale=cluster_std[cluster_id], size=n_dimensions)

    # Append the point to our dataset
    points[i, :] = point

# Convert to DataFrame for easier handling
df_points = pd.DataFrame(points, columns=[f"Dim_{i+1}" for i in range(n_dimensions)])
df_points['Label'] = labels  # Unique labels for each point

# Save to CSV (optional)
df_points.to_csv("synthetic_eeg_data.csv", index=False)

print("Sample of generated data:")
print(df_points.head())


Sample of generated data:
      Dim_1      Dim_2     Dim_3     Dim_4     Dim_5     Dim_6      Dim_7  \
0  8.623181  10.717884  1.429764 -0.063690  3.783480  8.012148  11.052118   
1 -1.211337   3.331281  0.328559  3.879778  3.395592  9.691704   5.421771   
2  0.054890   4.454458  0.477346  2.490225  0.534991  8.871353   6.491271   
3  3.805814   8.851717  8.027325  6.385234  2.467127  2.203452   1.644162   
4  9.562719   9.652684  0.842321  0.509438  2.600054  7.460114  10.596427   

      Dim_8     Dim_9    Dim_10  ...   Dim_120   Dim_121   Dim_122   Dim_123  \
0  1.861547  6.758327  4.056489  ...  8.978716  4.194471  5.237187  2.646680   
1  7.368510  9.667705  6.705576  ...  6.599820  2.389007  4.266338  2.861474   
2  6.571056  6.898760  6.052791  ...  7.986344 -0.001523  1.903675  3.492384   
3  8.119503  7.345832  7.280918  ...  6.596030  8.721514  7.802909  4.834901   
4  1.898137  6.440008  4.248116  ...  9.436881  5.829479  5.396163  1.391503   

    Dim_124   Dim_125   Dim_12

In [None]:
!pip install umap-learn matplotlib plotly



In [37]:
import pandas as pd
from sklearn.manifold import TSNE
import plotly.graph_objects as go

In [38]:
# Load your dataset
data = pd.read_csv('synthetic_eeg_data.csv')
embeddings = data.drop('Label', axis=1)

In [60]:
# Apply t-SNE
tsne = TSNE(n_components=3, perplexity=3, learning_rate=200, n_iter=1000)
tsne_result = tsne.fit_transform(embeddings)

In [61]:
# Just Points Plot
fig_points = go.Figure()

fig_points.add_trace(go.Scatter3d(
    x=tsne_result[:, 0],
    y=tsne_result[:, 1],
    z=tsne_result[:, 2],
    mode='markers',
    marker=dict(size=2, color='black')
))

fig_points.update_layout(scene=dict(xaxis_visible=False, yaxis_visible=False, zaxis_visible=False),
                         title="t-SNE: Points Only")
fig_points.show()


In [58]:
import numpy as np
import plotly.graph_objects as go

# Assuming tsne_result contains your t-SNE output
# Here's an example setup for tsne_result (if not already defined)
# tsne_result = np.random.rand(100, 3) * 100  # Random sample data

# Create a figure object
fig = go.Figure()

# Add the scatter plot for points
fig.add_trace(go.Scatter3d(
    x=tsne_result[:, 0],
    y=tsne_result[:, 1],
    z=tsne_result[:, 2],
    mode='markers',
    marker=dict(size=2, color='black'),
    name='Points'
))

# Determine maximum distance to normalize line transparencies
max_distance = np.max([np.linalg.norm(tsne_result[i] - tsne_result[j]) for i in range(len(tsne_result)) for j in range(i+1, len(tsne_result))])

# Add lines between points with fading based on distance
for i in range(len(tsne_result)):
    for j in range(i + 1, len(tsne_result)):
        distance = np.linalg.norm(tsne_result[i] - tsne_result[j])
        # Normalize distance and apply exponential decay for transparency
        normalized_distance = distance / max_distance
        exponential_decay_opacity = np.exp(-normalized_distance*1.5)  # Adjust decay factor to tune visibility
        rgba_color = f'rgba(128, 128, 128, {exponential_decay_opacity:.2f})'

        fig.add_trace(go.Scatter3d(
            x=[tsne_result[i, 0], tsne_result[j, 0]],
            y=[tsne_result[i, 1], tsne_result[j, 1]],
            z=[tsne_result[i, 2], tsne_result[j, 2]],
            mode='lines',
            line=dict(color=rgba_color, width=0.5),
            showlegend=False
        ))

# Update layout to hide axes and add a title
fig.update_layout(
    scene=dict(
        xaxis_visible=False,
        yaxis_visible=False,
        zaxis_visible=False
    ),
    title="3D t-SNE Visualization with Enhanced Transparency Variance"
)

fig.show()
