In [None]:
from msp_tsne import MultiscaleParametricTSNE, ParametricTSNE
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline

In [None]:
df = pd.read_json('/home/jovyan/work/data/features.json')
X = df[['length_sec', 'pitch_range_min_freq', 'pitch_range_min_note', 'pitch_range_max_freq']].values
X_test = X[0:1000,:]
X_test.shape

## Test MSP t-SNE

In [None]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler(feature_range=(0,1))),
    ('msp-tsne', MultiscaleParametricTSNE(n_components=2,
                n_iter=1000,
                batch_size=500,
                early_exaggeration_epochs = 50,
                early_exaggeration_value = 4.,
                early_stopping_epochs = np.inf,
                early_stopping_min_improvement = 1e-2,
                nl1 = 1000,
                nl2 = 500,
                nl3 = 250,
                logdir=None, verbose=0))
])
X_transformed = pipeline.fit_transform(X_test)
plt.scatter(X_transformed[:, 0], X_transformed[:, 1], edgecolor='k')
plt.show

In [None]:
# transform new dataset X2 with pre-trained model
X2_new = transformer.transform(X2)

## Side-by-side Comaprison: PCA, t-SNE, P t-SNE, MSP t-SNE

### Using Feature Vectors from MAGHSOM

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from typing import Optional

class DimensionalityComparator:
    def __init__(self, data: pd.DataFrame, labels: Optional[np.ndarray] = None):
        self.data = data
        self.labels = labels
        self.scaler = MinMaxScaler(feature_range=(0,1))

    def preprocess(self):
        self.data_scaled = self.scaler.fit_transform(self.data)

    def run_pca(self):
        self.pca = PCA(n_components=2)
        self.pca_result = self.pca.fit_transform(self.data_scaled)

    def run_tsne(self, random_state=42):
        self.tsne = TSNE(n_components=2, random_state=random_state)
        self.tsne_result = self.tsne.fit_transform(self.data_scaled)

    def run_p_tsne(self):
        self.p_tsne = ParametricTSNE(n_components=2, n_iter=1000)
        self.p_tsne_result = self.p_tsne.fit_transform(self.data_scaled)

    def run_msp_tsne(self):
        self.msp_tsne = MultiscaleParametricTSNE(n_components=2, n_iter=1000)
        self.msp_tsne_result = self.msp_tsne.fit_transform(self.data_scaled)

    def visualize(self):
        fig, axs = plt.subplots(1, 4, figsize=(24, 5))

        # PCA plot
        if self.labels is not None:
            scatter1 = axs[0].scatter(self.pca_result[:, 0], self.pca_result[:, 1], c=self.labels, cmap='viridis', edgecolor='k')
            plt.colorbar(scatter1, ax=axs[0])
        else:
            axs[0].scatter(self.pca_result[:, 0], self.pca_result[:, 1], edgecolor='k')
        axs[0].set_title('PCA')
        axs[0].set_xlabel('PC 1')
        axs[0].set_ylabel('PC 2')

        # t-SNE plot
        if self.labels is not None:
            scatter2 = axs[1].scatter(self.tsne_result[:, 0], self.tsne_result[:, 1], c=self.labels, cmap='viridis', edgecolor='k')
            plt.colorbar(scatter2, ax=axs[1])
        else:
            axs[1].scatter(self.tsne_result[:, 0], self.tsne_result[:, 1], edgecolor='k')
        axs[1].set_title('t-SNE')
        axs[1].set_xlabel('Dim 1')
        axs[1].set_ylabel('Dim 2')

        # P-t-SNE plot
        if self.labels is not None:
            scatter3 = axs[2].scatter(self.p_tsne_result[:, 0], self.p_tsne_result[:, 1], c=self.labels, cmap='viridis', edgecolor='k')
            plt.colorbar(scatter3, ax=axs[2])
        else:
            axs[2].scatter(self.p_tsne_result[:, 0], self.p_tsne_result[:, 1], edgecolor='k')
        axs[2].set_title('P-t-SNE')
        axs[2].set_xlabel('Dim 1')
        axs[2].set_ylabel('Dim 2')

        # MSP-t-SNE plot
        if self.labels is not None:
            scatter3 = axs[3].scatter(self.msp_tsne_result[:, 0], self.msp_tsne_result[:, 1], c=self.labels, cmap='viridis', edgecolor='k')
            plt.colorbar(scatter3, ax=axs[3])
        else:
            axs[3].scatter(self.msp_tsne_result[:, 0], self.msp_tsne_result[:, 1], edgecolor='k')
        axs[3].set_title('MSP-t-SNE')
        axs[3].set_xlabel('Dim 1')
        axs[3].set_ylabel('Dim 2')

        plt.tight_layout()
        plt.show()

    def compare(self):
        self.preprocess()
        self.run_pca()
        self.run_tsne()
        self.run_p_tsne()
        self.run_msp_tsne()
        self.visualize()


In [None]:
dm_comp = DimensionalityComparator(X_test)
dm_comp.compare()

### Using MNIST Dataset

In [None]:
from sklearn.datasets import load_digits
X_mnist, y_mnist = load_digits(return_X_y=True)
dm_comp = DimensionalityComparator(X_mnist, y_mnist)
dm_comp.compare()

## P t-SNE and MSP t-SNE: Fit-Transform then Apply

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

class TSNEComparison:
    """
    A class to compare Parametric t-SNE and Multi-Scale Parametric t-SNE.
    """
    def __init__(self, n_components=2, perplexity=30.0, n_iter=250, test_size=0.25, random_state=0):
        """
        Initializes the comparison with specified parameters and sets up the model pipelines.
        """
        self.n_components = n_components
        self.perplexity = perplexity
        self.n_iter = n_iter
        self.test_size = test_size
        self.random_state = random_state

        # Pipeline with ParametricTSNE
        self.pipe_parametric = Pipeline([
            ('scaler', MinMaxScaler(feature_range=(-1, 1))),
            ('pt', ParametricTSNE(
                n_components=self.n_components,
                perplexity=self.perplexity,
                n_iter=self.n_iter,
                verbose=0
            ))
        ])

        # Pipeline with MultiscaleParametricTSNE
        self.pipe_multiscale = Pipeline([
            ('scaler', MinMaxScaler(feature_range=(-1, 1))),
            ('mst', MultiscaleParametricTSNE(
                n_components=self.n_components,
                n_iter=self.n_iter,
                verbose=0
            ))
        ])

    def plot_side_by_side(self, X_p_train, X_p_test, X_ms_train, X_ms_test, y_train, y_test):
        """
        Creates a side-by-side plot to compare the embeddings from both methods.
        """
        colors = cm.rainbow(np.linspace(0, 1, 10))

        # Combine train and test sets for a full visualization
        Xp_full = np.vstack((X_p_train, X_p_test))
        Xms_full = np.vstack((X_ms_train, X_ms_test))
        y_full = np.concatenate((y_train, y_test))

        fig, axes = plt.subplots(1, 2, figsize=(12, 5))

        # Subplot for Parametric t-SNE
        axes[0].set_title('Parametric t-SNE')
        for c in range(10):
            axes[0].scatter(
                Xp_full[y_full == c, 0], Xp_full[y_full == c, 1], s=8, color=colors[c], alpha=.6
            )
        axes[0].set_xlabel('Dimension 1')
        axes[0].set_ylabel('Dimension 2')

        # Subplot for Multi-Scale Parametric t-SNE
        axes[1].set_title('Multi-Scale Parametric t-SNE')
        for c in range(10):
            axes[1].scatter(
                Xms_full[y_full == c, 0], Xms_full[y_full == c, 1], s=8, color=colors[c], alpha=.6
            )
        axes[1].set_xlabel('Dimension 1')
        axes[1].set_ylabel('Dimension 2')

        fig.tight_layout()
        return fig, axes

    def run(self):
        """
        Executes the full comparison pipeline: data loading, splitting,
        model training, and result plotting.
        """
        # Load and split the digits dataset
        X, y = load_digits(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=self.test_size, random_state=self.random_state
        )

        # Fit ParametricTSNE on training data and transform both train and test sets
        X_p_train = self.pipe_parametric.fit_transform(X_train)
        X_p_test = self.pipe_parametric.transform(X_test)

        # Fit MultiscaleParametricTSNE on training data and transform both sets
        X_ms_train = self.pipe_multiscale.fit_transform(X_train)
        X_ms_test = self.pipe_multiscale.transform(X_test)

        # Generate and show the comparison plot
        self.plot_side_by_side(X_p_train, X_p_test, X_ms_train, X_ms_test, y_train, y_test)
        plt.show()

def main():
    tsne_comparison = TSNEComparison()
    tsne_comparison.run()


In [None]:
main()