In [13]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder

# Load data
DATA_SOURCE_PATH = '../../data/processed/pscalar/neural_gbc_nn.csv'
data = pd.read_csv(DATA_SOURCE_PATH)
PANSS_selector = list(data.columns[data.columns.str.contains('PANSS')])
data_filtered = data.drop(PANSS_selector+['Group', 'id'], axis=1)

In [28]:
# Compute PCA of the input data
PCA_model = PCA(n_components=50)
PCA_data = PCA_model.fit_transform(data_filtered)

# 2D Latent Space

In [49]:
# Create a TSNE model and fit it to the PCA data
TSNE_model = TSNE(n_components=2, perplexity=500, n_iter=2000)
TSNE_data = TSNE_model.fit_transform(PCA_data)

In [50]:
TSNE_data = pd.DataFrame(TSNE_data, columns=['x', 'y'])
TSNE_data['Group'] = data['Group']

In [51]:
# Plot the data
embeddings_2d_shallow = TSNE_data[['x', 'y']].to_numpy()

# Encode the group labels
le = LabelEncoder()
group_labels = data['Group'].to_numpy()
group_labels = le.fit_transform(group_labels)

In [69]:
import matplotlib.pyplot as plt
import seaborn as sns

# Create a color palette with 4 colors
palette = sns.color_palette("hls", 4)

# Create the scatter plot
for i in range(4):
    # Get the encoded inputs for this group
    group_inputs = embeddings_2d_shallow[group_labels == i]

    original_string = le.inverse_transform([i])[0]

    # Plot the encoded inputs for this group
    plt.scatter(group_inputs[:, 0], group_inputs[:, 1], color=palette[i], label=f'Group {original_string}')

# Show the plot
plt.legend()
plt.xlabel(r'$\theta_1$')
plt.ylabel(r'$\theta_2$')
plt.title('2D Latent space using t-SNE')
plt.savefig('../final_report/figures/tsne_2d_encoder.svg')
plt.show()

# 3D Latent Space

In [70]:
# Create a TSNE model and fit it to the PCA data
TSNE_model = TSNE(n_components=3, perplexity=50, n_iter=2000)
TSNE_data_3d = TSNE_model.fit_transform(PCA_data)

In [71]:
# Create a DataFrame with the TSNE data
TSNE_data_3d = pd.DataFrame(TSNE_data_3d, columns=['x', 'y', 'z'])
TSNE_data_3d['Group'] = data['Group']

In [72]:
# Plot the 3D embeddings
embeddings_3d = TSNE_data_3d[['x', 'y', 'z']].to_numpy()

# Encode the group labels
le = LabelEncoder()
group_labels = data['Group'].to_numpy()
group_labels = le.fit_transform(group_labels)

In [78]:
%matplotlib qt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns

#plt.tight_layout()

# Create a new figure
fig = plt.figure()

# Create a color palette with 4 colors
palette = sns.color_palette("hls", 4)

# Create a 3D axis
ax = fig.add_subplot(111, projection='3d')

ax.set_xlabel(r'$\theta_1$')
ax.set_ylabel(r'$\theta_2$')
ax.set_zlabel(r'$\theta_3$')

# Create the scatter plot
for i in range(4):
    # Get the encoded inputs for this group
    group_inputs = embeddings_3d[group_labels == i]

    original_string = le.inverse_transform([i])[0]

    # Plot the encoded inputs for this group
    ax.scatter(group_inputs[:, 0], group_inputs[:, 1], group_inputs[:, 2], color=palette[i], label=f'Group {original_string}')

# Show the plot
plt.title('3D Latent space using t-SNE')
plt.legend()
plt.show()