In [None]:
import pacmap
import numpy as np
import plotly.express as px
import pandas as pd
from typing import List, Dict, Union


def generate_random_samples(num_samples: int, embedding_dim: int) -> np.ndarray:
    """
    Generate random sample embeddings.

    Args:
        num_samples (int): Number of random samples to generate.
        embedding_dim (int): Dimension of the embeddings.

    Returns:
        np.ndarray: Array of random sample embeddings.
    """
    return np.random.rand(num_samples, embedding_dim)


def generate_3d_plot(embeddings: np.ndarray, user_query: str) -> None:
    """
    Generate a 3D plot of embeddings using PaCMAP.

    Args:
        embeddings (np.ndarray): Array of embeddings.
        user_query (str): User query text.

    Returns:
        None
    """
    try:
        embedding_projector = pacmap.PaCMAP(
            n_components=3, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0, random_state=1
        )

        query_vector = np.random.rand(1, embeddings.shape[1])
        embeddings_with_query = np.vstack((embeddings, query_vector))

        embeddings_projected = embedding_projector.fit_transform(
            embeddings_with_query, init="pca"
        )

        df = pd.DataFrame(
            [
                {
                    "x": embeddings_projected[i, 0],
                    "y": embeddings_projected[i, 1],
                    "z": embeddings_projected[i, 2],
                    "source": f"Sample {i}",
                    "extract": f"Sample {i} embedding",
                    "symbol": "circle",
                    "size_col": 100,
                }
                for i in range(embeddings.shape[0])
            ]
            + [
                {
                    "x": embeddings_projected[-1, 0],
                    "y": embeddings_projected[-1, 1],
                    "z": embeddings_projected[-1, 2],
                    "source": "User query",
                    "extract": user_query,
                    "size_col": 500,
                    "symbol": "star",
                }
            ]
        )

        fig = px.scatter_3d(
            df,
            x="x",
            y="y",
            z="z",
            color="source",
            hover_data="extract",
            size="size_col",
            symbol="symbol",
            color_discrete_map={"User query": "black"},
            width=1000,
            height=700,
        )
        fig.update_traces(
            marker=dict(opacity=1, line=dict(width=0, color="DarkSlateGrey")),
            selector=dict(mode="markers"),
        )
        fig.update_layout(
            legend_title_text="<b>Sample source</b>",
            title="<b>3D Projection of Random Embeddings via PaCMAP</b>",
        )
        fig.show()
    except Exception as e:
        print(f"An error occurred: {str(e)}")


# Example usage
num_samples = 1000
embedding_dim = 50
user_query = "What is the meaning of life?"

random_embeddings = generate_random_samples(num_samples, embedding_dim)
generate_3d_plot(random_embeddings, user_query)


In [12]:
!pip install -q pacmap


  error: subprocess-exited-with-error
  
  × python setup.py bdist_wheel did not run successfully.
  │ exit code: 1
  ╰─> [14 lines of output]
      running bdist_wheel
      running build
      running build_py
      creating build
      creating build\lib.win-amd64-cpython-310
      creating build\lib.win-amd64-cpython-310\annoy
      copying annoy\__init__.py -> build\lib.win-amd64-cpython-310\annoy
      copying annoy\__init__.pyi -> build\lib.win-amd64-cpython-310\annoy
      copying annoy\py.typed -> build\lib.win-amd64-cpython-310\annoy
      running build_ext
      building 'annoy.annoylib' extension
      error: Microsoft Visual C++ 14.0 or greater is required. Get it with "Microsoft C++ Build Tools": https://visualstudio.microsoft.com/visual-cpp-build-tools/
      [end of output]
  
  note: This error originates from a subprocess, and is likely not a problem with pip.
  ERROR: Failed building wheel for annoy
ERROR: Could not build wheels for annoy, which is required to instal

In [None]:

# TASK : we have create the 3 dim tensor of (we have generate to random samples of 100) below code 2d we have extent to 3d plot

import pacmap
import numpy as np
import plotly.express as px
import pandas as pd
embedding_projector = pacmap.PaCMAP(
    n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0, random_state=1
)

embeddings_2d = [
    list(KNOWLEDGE_VECTOR_DATABASE.index.reconstruct_n(idx, 1)[0])
    for idx in range(len(docs_processed))
] + [query_vector]

# fit the data (The index of transformed data corresponds to the index of the original data)
documents_projected = embedding_projector.fit_transform(
    np.array(embeddings_2d), init="pca"
)

df = pd.DataFrame.from_dict(
    [
        {
            "x": documents_projected[i, 0],
            "y": documents_projected[i, 1],
            "source": docs_processed[i].metadata["source"].split("/")[1],
            "extract": docs_processed[i].page_content[:100] + "...",
            "symbol": "circle",
            "size_col": 4,
        }
        for i in range(len(docs_processed))
    ]
    + [
        {
            "x": documents_projected[-1, 0],
            "y": documents_projected[-1, 1],
            "source": "User query",
            "extract": user_query,
            "size_col": 100,
            "symbol": "star",
        }
    ]
)

# visualize the embedding
fig = px.scatter(
    df,
    x="x",
    y="y",
    color="source",
    hover_data="extract",
    size="size_col",
    symbol="symbol",
    color_discrete_map={"User query": "black"},
    width=1000,
    height=700,
)
fig.update_traces(
    marker=dict(opacity=1, line=dict(width=0, color="DarkSlateGrey")),
    selector=dict(mode="markers"),
)
fig.update_layout(
    legend_title_text="<b>Chunk source</b>",
    title="<b>2D Projection of Chunk Embeddings via PaCMAP</b>",
)
fig.show()


In [7]:
!pip install -q -U dash plotly 



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
! pip install nbformat>=4.2.0



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import plotly.express as px
fig = px.scatter(x=["a", "b", "c"], y=[1, 3, 2])
# fig.show()
fig.show()


In [None]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html
from typing import Union


def load_data(file_path: str) -> pd.DataFrame:
    """
    Load data from a CSV file.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        pd.DataFrame: Loaded data as a DataFrame.

    Raises:
        FileNotFoundError: If the specified file is not found.
        pd.errors.EmptyDataError: If the loaded data is empty.
    """
    try:
        data = pd.read_csv(file_path)
        if data.empty:
            raise pd.errors.EmptyDataError("Loaded data is empty.")
        return data
    except FileNotFoundError as e:
        print(f"Error: File '{file_path}' not found.")
        raise e


def create_scatter_plot(data: pd.DataFrame, x_column: str, y_column: str, color_column: Union[str, None] = None) -> px.scatter:
    """
    Create a scatter plot using Plotly Express.

    Args:
        data (pd.DataFrame): Data for the chart.
        x_column (str): Column name for the x-axis.
        y_column (str): Column name for the y-axis.
        color_column (str, optional): Column name for color encoding. Defaults to None.

    Returns:
        px.scatter: Plotly Express scatter plot figure.
    """
    fig = px.scatter(data, x=x_column, y=y_column, color=color_column)
    return fig


def create_dash_app(data: pd.DataFrame, x_column: str, y_column: str, color_column: Union[str, None] = None) -> Dash:
    """
    Create a Dash application with a scatter plot.

    Args:
        data (pd.DataFrame): Data for the chart.
        x_column (str): Column name for the x-axis.
        y_column (str): Column name for the y-axis.
        color_column (str, optional): Column name for color encoding. Defaults to None.

    Returns:
        Dash: Dash application instance.
    """
    app = Dash(__name__)
    fig = create_scatter_plot(data, x_column, y_column, color_column)

    app.layout = html.Div(children=[
        html.H1(children='Data Visualization'),
        html.Div(children='A scatter plot showing the data.'),
        dcc.Graph(id='scatter-plot', figure=fig)
    ])

    return app


def main():
    try:
        file_path = '/content/sample_data/california_housing_test.csv'
        data = load_data(file_path)

        # Specify the column names for the scatter plot
        x_column = 'longitude'
        y_column = 'latitude'
        color_column = 'median_house_value'

        app = create_dash_app(data, x_column, y_column, color_column)
        app.run_server(debug=True)
    except Exception as e:
        print(f"An error occurred: {str(e)}")


if __name__ == '__main__':
    main()


In [None]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html


def load_data(file_path: str) -> pd.DataFrame:
    """
    Load data from a CSV file.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        pd.DataFrame: Loaded data as a DataFrame.

    Raises:
        FileNotFoundError: If the specified file is not found.
        pd.errors.EmptyDataError: If the loaded data is empty.
    """
    try:
        data = pd.read_csv(file_path)
        if data.empty:
            raise pd.errors.EmptyDataError("Loaded data is empty.")
        return data
    except FileNotFoundError as e:
        print(f"Error: File '{file_path}' not found.")
        raise e


def create_bar_chart(data: pd.DataFrame) -> px.bar:
    """
    Create a bar chart using Plotly Express.

    Args:
        data (pd.DataFrame): Data for the chart.

    Returns:
        px.bar: Plotly Express bar chart figure.
    """
    # Automatically detect the column names
    numeric_columns = data.select_dtypes(include=['float64', 'int64']).columns
    if len(numeric_columns) < 2:
        raise ValueError("Insufficient numeric columns for creating a bar chart.")
    x_column = numeric_columns[0]
    y_column = numeric_columns[1]
    color_column = data.select_dtypes(include=['object']).columns[0] if len(data.select_dtypes(include=['object']).columns) > 0 else None

    fig = px.bar(data, x=x_column, y=y_column, color=color_column, barmode="group")
    fig.show()
    return fig

def create_dash_app(data: pd.DataFrame) -> Dash:
    """
    Create a Dash application with a bar chart.

    Args:
        data (pd.DataFrame): Data for the chart.

    Returns:
        Dash: Dash application instance.
    """
    app = Dash(__name__)
    fig = create_bar_chart(data)

    app.layout = html.Div(children=[
        html.H1(children='Data Visualization'),
        html.Div(children='A bar chart showing the data.'),
        dcc.Graph(id='bar-chart', figure=fig)
    ])

    return app

x = load_data('/content/sample_data/california_housing_test.csv')
app = create_dash_app(x)
app.run_server(debug=True)


In [None]:
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html


def load_data(file_path: str) -> pd.DataFrame:
    """
    Load data from a CSV file.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        pd.DataFrame: Loaded data as a DataFrame.

    Raises:
        FileNotFoundError: If the specified file is not found.
        pd.errors.EmptyDataError: If the loaded data is empty.
    """
    try:
        data = pd.read_csv(file_path)
        if data.empty:
            raise pd.errors.EmptyDataError("Loaded data is empty.")
        return data
    except FileNotFoundError as e:
        print(f"Error: File '{file_path}' not found.")
        raise e


def create_scatter_plot(data: pd.DataFrame) -> px.scatter:
    """
    Create a scatter plot using Plotly Express.

    Args:
        data (pd.DataFrame): Data for the chart.

    Returns:
        px.scatter: Plotly Express scatter plot figure.
    """
    # Automatically detect the column names
    numeric_columns = data.select_dtypes(include=['float64', 'int64']).columns
    if len(numeric_columns) < 2:
        raise ValueError("Insufficient numeric columns for creating a scatter plot.")
    x_column = numeric_columns[0]
    y_column = numeric_columns[1]
    color_column = data.select_dtypes(include=['object']).columns[0] if len(data.select_dtypes(include=['object']).columns) > 0 else None

    fig = px.scatter(data, x=x_column, y=y_column, color=color_column)
    return fig

def create_dash_app(data: pd.DataFrame) -> Dash:
    """
    Create a Dash application with a scatter plot.

    Args:
        data (pd.DataFrame): Data for the chart.

    Returns:
        Dash: Dash application instance.
    """
    app = Dash(__name__)
    fig = create_scatter_plot(data)

    app.layout = html.Div(children=[
        html.H1(children='Data Visualization'),
        html.Div(children='A scatter plot showing the data.'),
        dcc.Graph(id='scatter-plot', figure=fig)
    ])

    return app

x = load_data('/content/sample_data/california_housing_test.csv')
app = create_dash_app(x)
app.run_server(debug=True)


In [None]:
import json
import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html
from dash.dependencies import Input, Output


def load_data(file_path: str) -> pd.DataFrame:
    """
    Load data from a CSV file.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        pd.DataFrame: Loaded data as a DataFrame.

    Raises:
        FileNotFoundError: If the specified file is not found.
        pd.errors.EmptyDataError: If the loaded data is empty.
    """
    try:
        data = pd.read_csv(file_path)
        if data.empty:
            raise pd.errors.EmptyDataError("Loaded data is empty.")
        return data
    except FileNotFoundError as e:
        print(f"Error: File '{file_path}' not found.")
        raise e


def create_scatter_plot(data: pd.DataFrame) -> px.scatter:
    """
    Create a scatter plot using Plotly Express.

    Args:
        data (pd.DataFrame): Data for the chart.

    Returns:
        px.scatter: Plotly Express scatter plot figure.
    """
    # Automatically detect the column names
    numeric_columns = data.select_dtypes(include=['float64', 'int64']).columns
    if len(numeric_columns) < 2:
        raise ValueError("Insufficient numeric columns for creating a scatter plot.")
    x_column = numeric_columns[0]
    y_column = numeric_columns[1]
    color_column = data.select_dtypes(include=['object']).columns[0] if len(data.select_dtypes(include=['object']).columns) > 0 else None

    fig = px.scatter(data, x=x_column, y=y_column, color=color_column)
    fig.update_layout(clickmode='event+select')
    fig.update_traces(marker_size=20)
    return fig

def create_dash_app(data: pd.DataFrame) -> Dash:
    """
    Create a Dash application with a scatter plot.

    Args:
        data (pd.DataFrame): Data for the chart.

    Returns:
        Dash: Dash application instance.
    """
    app = Dash(__name__)
    fig = create_scatter_plot(data)

    styles = {
        'pre': {
            'border': 'thin lightgrey solid',
            'overflowX': 'scroll'
        }
    }

    app.layout = html.Div([
        dcc.Graph(
            id='basic-interactions',
            figure=fig
        ),

        html.Div(className='row', children=[
            html.Div([
                dcc.Markdown("""
                    **Hover Data**

                    Mouse over values in the graph.
                """),
                html.Pre(id='hover-data', style=styles['pre'])
            ], className='three columns'),

            html.Div([
                dcc.Markdown("""
                    **Click Data**

                    Click on points in the graph.
                """),
                html.Pre(id='click-data', style=styles['pre']),
            ], className='three columns'),

            html.Div([
                dcc.Markdown("""
                    **Selection Data**

                    Choose the lasso or rectangle tool in the graph's menu
                    bar and then select points in the graph.

                    Note that if `layout.clickmode = 'event+select'`, selection data also
                    accumulates (or un-accumulates) selected data if you hold down the shift
                    button while clicking.
                """),
                html.Pre(id='selected-data', style=styles['pre']),
            ], className='three columns'),

            html.Div([
                dcc.Markdown("""
                    **Zoom and Relayout Data**

                    Click and drag on the graph to zoom or click on the zoom
                    buttons in the graph's menu bar.
                    Clicking on legend items will also fire
                    this event.
                """),
                html.Pre(id='relayout-data', style=styles['pre']),
            ], className='three columns')
        ])
    ])

    @app.callback(
        Output('hover-data', 'children'),
        Input('basic-interactions', 'hoverData'))
    def display_hover_data(hoverData):
        return json.dumps(hoverData, indent=2)

    @app.callback(
        Output('click-data', 'children'),
        Input('basic-interactions', 'clickData'))
    def display_click_data(clickData):
        return json.dumps(clickData, indent=2)

    @app.callback(
        Output('selected-data', 'children'),
        Input('basic-interactions', 'selectedData'))
    def display_selected_data(selectedData):
        return json.dumps(selectedData, indent=2)

    @app.callback(
        Output('relayout-data', 'children'),
        Input('basic-interactions', 'relayoutData'))
    def display_relayout_data(relayoutData):
        return json.dumps(relayoutData, indent=2)

    return app

x = load_data('/content/sample_data/california_housing_test.csv')
app = create_dash_app(x)
app.run_server(debug=True)


In [None]:
import argparse
import sys
from typing import List, Tuple

import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html

def load_data(file_path: str) -> pd.DataFrame:
    """
    Load data from a CSV file.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        pd.DataFrame: Loaded data as a DataFrame.

    Raises:
        FileNotFoundError: If the specified file is not found.
        pd.errors.EmptyDataError: If the loaded data is empty.
    """
    try:
        data = pd.read_csv(file_path)
        if data.empty:
            raise pd.errors.EmptyDataError("Loaded data is empty.")
        return data
    except FileNotFoundError as e:
        print(f"Error: File '{file_path}' not found.")
        raise e

def create_bar_chart(data: pd.DataFrame, x: str, y: str, color: str) -> px.bar:
    """
    Create a bar chart using Plotly Express.

    Args:
        data (pd.DataFrame): Data for the chart.
        x (str): Column name for the x-axis.
        y (str): Column name for the y-axis.
        color (str): Column name for color encoding.

    Returns:
        px.bar: Plotly Express bar chart figure.
    """
    fig = px.bar(data, x=x, y=y, color=color, barmode="group")
    return fig

def create_dash_app(data: pd.DataFrame) -> Dash:
    """
    Create a Dash application with a bar chart.

    Args:
        data (pd.DataFrame): Data for the chart.

    Returns:
        Dash: Dash application instance.
    """
    app = Dash(__name__)
    fig = create_bar_chart(data, x="Fruit", y="Amount", color="City")

    app.layout = html.Div(children=[
        html.H1(children='Fruit Amount by City'),
        html.Div(children='A bar chart showing fruit amounts in different cities.'),
        dcc.Graph(id='bar-chart', figure=fig)
    ])

    return app

def parse_arguments(args: List[str]) -> argparse.Namespace:
    """
    Parse command-line arguments.

    Args:
        args (List[str]): List of command-line arguments.

    Returns:
        argparse.Namespace: Parsed arguments.
    """
    parser = argparse.ArgumentParser(description='Visualize data using Dash.')
    parser.add_argument('file_path', type=str, help='Path to the CSV file')
    return parser.parse_args(args)

def main(args: List[str]) -> None:
    """
    Main function to run the Dash application.

    Args:
        args (List[str]): List of command-line arguments.
    """
    try:
        parsed_args = parse_arguments(args)
        data = load_data(parsed_args.file_path)
        app = create_dash_app(data)
        app.run_server(debug=True)
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        sys.exit(1)

if __name__ == '__main__':
    main(sys.argv[1:])


In [None]:
pip install pacmap


In [None]:
!git clone https://github.com/YingfanWang/PaCMAP.git


In [None]:
%cd PaCMAP


In [None]:
!python /content/PaCMAP/demo/basic_demo.py


In [None]:
!


In [None]:
import pacmap
import numpy as np
import matplotlib.pyplot as plt

# loading preprocessed coil_20 dataset
# you can change it with any dataset that is in the ndarray format, with the shape (N, D)
# where N is the number of samples and D is the dimension of each sample
X = np.load("./data/coil_20.npy", allow_pickle=True)
X = X.reshape(X.shape[0], -1)
y = np.load("./data/coil_20_labels.npy", allow_pickle=True)

# initializing the pacmap instance
# Setting n_neighbors to "None" leads to a default choice shown below in "parameter" section
embedding = pacmap.PaCMAP(n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0)

# fit the data (The index of transformed data corresponds to the index of the original data)
X_transformed = embedding.fit_transform(X, init="pca")

# visualize the embedding
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.scatter(X_transformed[:, 0], X_transformed[:, 1], cmap="Spectral", c=y, s=0.6)


In [None]:
import pacmap
import numpy as np
import matplotlib.pyplot as plt
from annoy import AnnoyIndex

# loading preprocessed coil_20 dataset
X = np.load("./data/coil_20.npy", allow_pickle=True)
X = X.reshape(X.shape[0], -1)
y = np.load("./data/coil_20_labels.npy", allow_pickle=True)

# create nearest neighbor pairs
# here we use AnnoyIndex as an example, but the process can be done by any
# external NN library that provides neighbors into a matrix of the shape
# (n, n_neighbors_extra), where n_neighbors_extra is greater or equal to
# n_neighbors in the following example.

n, dim = X.shape
n_neighbors = 10
tree = AnnoyIndex(dim, metric='euclidean')
for i in range(n):
    tree.add_item(i, X[i, :])
tree.build(20)

nbrs = np.zeros((n, 20), dtype=np.int32)
for i in range(n):
    nbrs_ = tree.get_nns_by_item(i, 20 + 1) # The first nbr is always the point itself
    nbrs[i, :] = nbrs_[1:]

scaled_dist = np.ones((n, n_neighbors)) # No scaling is needed

# Type casting is needed for numba acceleration
X = X.astype(np.float32)
scaled_dist = scaled_dist.astype(np.float32)

# make sure n_neighbors is the same number you want when fitting the data
pair_neighbors = pacmap.sample_neighbors_pair(X, scaled_dist, nbrs, np.int32(n_neighbors))

# initializing the pacmap instance
# feed the pair_neighbors into the instance
embedding = pacmap.PaCMAP(n_components=2, n_neighbors=n_neighbors, MN_ratio=0.5, FP_ratio=2.0, pair_neighbors=pair_neighbors)

# fit the data (The index of transformed data corresponds to the index of the original data)
X_transformed = embedding.fit_transform(X, init="pca")

# visualize the embedding
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.scatter(X_transformed[:, 0], X_transformed[:, 1], cmap="Spectral", c=y, s=0.6)


In [None]:
!pip install -q umap trimap FlowCal  PaCMAP


In [None]:
!pip install PaCMAP


In [None]:
import pacmap
import numpy as np
import matplotlib.pyplot as plt

# loading preprocessed coil_20 dataset
# you can change it with any dataset that is in the ndarray format, with the shape (N, D)
# where N is the number of samples and D is the dimension of each sample
X = np.load("./data/coil_20.npy", allow_pickle=True)
X = X.reshape(X.shape[0], -1)
y = np.load("./data/coil_20_labels.npy", allow_pickle=True)

# Initialize the pacmap instance
# By default, the n_neighbors is set to 10.
# Setting n_neighbors to "None" can enable an automatic parameter selection
# choice shown in "parameter" section of the README file.
# Notice that from v0.6.0 on, we rename the n_dims parameter to n_components.
reducer = pacmap.PaCMAP(n_components=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0)

# fit the data (The index of transformed data corresponds to the index of the original data)
X_transformed = reducer.fit_transform(X, init="pca")

# visualize the embedding
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.scatter(X_transformed[:, 0], X_transformed[:, 1], cmap="Spectral", c=y, s=0.6)

# saving the reducer
pacmap.save(reducer, "./coil_20_reducer")

# loading the reducer
pacmap.load("./coil_20_reducer")


In [None]:
import umap
import trimap
import FlowCal
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import manifold, datasets
from time import time
from tqdm import tqdm
from sklearn.decomposition import PCA
import pacmap
from sklearn.datasets import make_swiss_roll, make_s_curve


def data_prep(data_path, dataset='MNIST', size=10000):
    '''
    This function loads the dataset as numpy array.
    Input:
        data_path: path of the folder you store all the data needed.
        dataset: the name of the dataset.
        size: the size of the dataset. This is useful when you only
              want to pick a subset of the data
    Output:
        X: the dataset in numpy array
        labels: the labels of the dataset.
    '''

    if dataset == 'MNIST':
        X = np.load(data_path + '/mnist_images.npy', allow_pickle=True).reshape(70000, 28*28)
        labels = np.load(data_path + '/mnist_labels.npy', allow_pickle=True)
    elif dataset == 'FMNIST':
        X = np.load(data_path + '/fmnist_images.npy', allow_pickle=True).reshape(70000, 28*28)
        labels = np.load(data_path + '/fmnist_labels.npy', allow_pickle=True)
    elif dataset == 'coil_20':
        X = np.load(data_path + '/coil_20.npy', allow_pickle=True).reshape(1440, 128*128)
        labels = np.load(data_path + '/coil_20_labels.npy', allow_pickle=True)
    elif dataset == 'coil_100':
        X = np.load(data_path + '/coil_100.npy', allow_pickle=True).reshape(7200, -1)
        labels = np.load(data_path + '/coil_100_labels.npy', allow_pickle=True)
    elif dataset == 'Flow_cytometry':
        X = FlowCal.io.FCSData(data_path + '/11-12-15_314.fcs')
        X = np.array(X)
        labels = np.zeros(10)
    elif dataset == 'mammoth':
        with open(data_path + '/mammoth_3d.json', 'r') as f:
            X = json.load(f)
        X = np.array(X)
        with open(data_path + '/mammoth_umap.json', 'r') as f:
            labels = json.load(f)
        labels = labels['labels']
        labels = np.array(labels)
    elif dataset == 'mammoth_50k':
        with open(data_path + '/mammoth_3d_50k.json', 'r') as f:
            X = json.load(f)
        X = np.array(X)
        labels = np.zeros(10)
    elif dataset == 'kddcup99':
        X = np.load(data_path + '/KDDcup99_float.npy', allow_pickle=True)
        labels = np.load(data_path + '/KDDcup99_labels_int.npy', allow_pickle=True)
    elif dataset == '20NG':
        X = np.load(data_path + '/20NG.npy', allow_pickle=True)
        labels = np.load(data_path + '/20NG_labels.npy', allow_pickle=True)
    elif dataset == 'USPS':
        X = np.load(data_path + '/USPS.npy', allow_pickle=True)
        labels = np.load(data_path + '/USPS_labels.npy', allow_pickle=True)
    elif dataset == 'cifar10':
        X = np.load(data_path + '/cifar10_imgs.npy', allow_pickle=True)
        labels = np.load(data_path + '/cifar10_labels.npy', allow_pickle=True)
    elif dataset == 'cifar100':
        X = np.load(data_path + '/cifar100_imgs.npy', allow_pickle=True)
        labels = np.load(data_path + '/cifar100_labels.npy', allow_pickle=True)
    elif dataset == 'Mouse_scRNA':
        data = pd.read_csv(data_path + '/GSE93374_Merged_all_020816_BatchCorrected_LNtransformed_doubletsremoved_Data.txt', sep='\t')
        X = data.to_numpy()
        labels = pd.read_csv(data_path + '/GSE93374_cell_metadata.txt', sep='\t')
    elif dataset == 'swiss_roll':
        X, labels = make_swiss_roll(n_samples=size, random_state=20200202)
    elif dataset == 's_curve':
        X, labels = make_s_curve(n_samples=size, random_state=20200202)
    elif dataset == 's_curve_hole':
        X, labels = make_s_curve(n_samples=size, random_state=20200202)
        anchor = np.array([0, 1, 0])
        indices = np.sum(np.square(X-anchor), axis=1) > 0.3
        X, labels = X[indices], labels[indices]
    elif dataset == 'swiss_roll_hole':
        X, labels = make_swiss_roll(n_samples=size, random_state=20200202)
        anchor = np.array([-10, 10, 0])
        indices = np.sum(np.square(X-anchor), axis=1) > 20
        X, labels = X[indices], labels[indices]
    elif dataset == '2D_curve':
        x = np.arange(-5.5, 9, 0.01)
        y = 0.01 * (x + 5) * (x + 2) * (x - 2) * (x - 6) * (x - 8)
        noise = np.random.randn(x.shape[0]) * 0.01
        y += noise
        x = np.reshape(x, (-1, 1))
        y = np.reshape(y, (-1, 1))
        X = np.hstack((x, y))
        labels = x
    else:
        print('Unsupported dataset')
        assert(False)
    return X[:size], labels[:size]

def experiment_five(X, method='PaCMAP', **kwargs):
    length = X.shape[0]
    X_lows, all_times = [], []
    for i in range(5):
        X_low, all_time = experiment(X, method, **kwargs)
        X_lows.append(X_low)
        all_times.append(all_time)
    X_lows = np.array(X_lows)
    all_times = np.array(all_times)
    return X_lows, all_times

def experiment(X, method='PaCMAP', **kwargs):
    if method == 'PaCMAP':
        transformer = PaCMAP(**kwargs)
    elif method == 'UMAP':
        transformer = umap.UMAP(**kwargs)
    elif method == 'TriMAP':
        transformer = trimap.TRIMAP(**kwargs)
    else:
        print("Incorrect method specified")
        assert(False)
    start_time = time()
    X_low = transformer.fit_transform(X)
    total_time = time() - start_time
    return X_low, total_time


def main(data_path, output_path, dataset_name = 'MNIST', size=10000, data_pca=True):
    X, labels = data_prep(dataset=dataset_name, size=size)
    if data_pca:
        if dataset_name == 'Mouse_scRNA':
            pca = PCA(n_components=1000)
            X = pca.fit_transform(X)
        elif X.shape[1] > 100:
            pca = PCA(n_components=100)
            X = pca.fit_transform(X)
    print("Data loaded successfully")
    # do experiment
    methods = ['PaCMAP', 'UMAP', 'TriMAP']

    args = {'TriMAP':[{'n_inliers':20}],
        'UMAP':[{'n_neighbors':10}, {'n_neighbors':20}, {'n_neighbors':40}],
        'PaCMAP':[{'num_NN':5, 'num_FP': 10, 'num_mid': 2},
        {'num_NN':10, 'num_FP': 20, 'num_mid': 5},
        {'num_NN':20, 'num_FP': 40, 'num_mid': 10}]}

    print("Experiment started")
    for method in methods:
        parameters = args[method]
        for parameter in parameters:
            X_low, total_time = experiment_five(X, method, **parameter)
            if 'n_neighbors' in parameter:
                n_neighbors = parameter['n_neighbors']
            elif 'n_inliers' in parameter:
                n_neighbors = parameter['n_inliers']
            elif 'num_NN' in parameter:
                n_neighbors = parameter['num_NN']
            elif 'perplexity' in parameter:
                n_neighbors = parameter['perplexity']
            else:
                n_neighbors = 'default' # Default value
            np.save(f'/home/home1/hh219/PaCMAP/output_5/{dataset_name}_{method}_{n_neighbors}', X_low)
            avg_time = np.mean(total_time)
            print(f'Average time for method {method} on {dataset_name} with param={n_neighbors} is {avg_time}')
            print(f'The detailed time is {total_time}')
    return 0

if __name__ == '__main__':
    # Please define the data_path and output_path here
    data_path = "/content/PaCMAP/data/"
    output_path = "/content/PaCMAP/data/output/"
    main(data_path, output_path,'MNIST', 10000000)
    main(data_path, output_path,'FMNIST', 10000000)
    main(data_path, output_path,'coil_20', 10000000)
    main(data_path, output_path,'coil_100', 10000000)

    main(data_path, output_path,'Mouse_scRNA', 10000000)
    main(data_path, output_path,'mammoth', 10000000)
    main(data_path, output_path,'s_curve_hole', 10000)
    main(data_path, output_path,'20NG', 20000)
    main(data_path, output_path,'USPS', 20000)


    main(data_path, output_path,'Flow_cytometry', 10000000)
    main(data_path, output_path,'kddcup99', 10000000)


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import umap
from scipy import integrate
from pacmap import PaCMAP



cmap_fig = plt.cm.get_cmap("Spectral")
cmap = plt.cm.get_cmap("RdYlGn_r")
cmap_ = plt.cm.get_cmap("gist_yarg")


# If you would like discrete ladders, use ladder_map
# Otherwise, just leave it, see examples below
def ladder_map(grids, ladder_range):
    l_map = np.zeros(grids.shape)
    for thres in ladder_range:
        l_map += (grids > thres).astype(np.float32)
    l_map /= len(ladder_range)
    return l_map

# parameter "a" and "b" use default values as below
def attr(x):
    return -pow(x, 0.79)/(1 + pow(x, 2))

def repul(x):
    return 0.895 * x/(1 + pow(x, 2))/(0.001 + pow(x, 2))

def integ_attr(b):
    res = np.zeros(b.shape)
    for i in range(b.shape[1]):
        res[0][i] = integrate.quad(attr, 0, b[0][i], points=[0])[0]
    return res

def integ_repul(b):
    res = np.zeros(b.shape)
    for i in range(b.shape[0]):
        res[i][0] = integrate.quad(repul, 0, b[i][0], points=[0])[0]
    return res

# For t-SNE we choose a neighbor and further point to visualize forces on them (using COIL20 dataset, 300 iterations)
def t_attr(x):
    qij = 1.0 / (x ** 2 + 1.0) / 11500
    qij = np.maximum(qij, 1e-12)
    force = - (8.58 * 1e-5 - qij) * x / (1.0 + x ** 2)
    return force

def t_repul(x):
    qij = 1.0 / (x ** 2 + 1.0) / 11500
    qij = np.maximum(qij, 1e-12)
    force = - 10 * (1.19 * 1e-8 - qij) * x / (1.0 + x ** 2)
    return force

def t_integ_attr(b):
    res = np.zeros(b.shape[0])
    for i in range(b.shape[0]):
        res[i] = integrate.quad(t_attr, 0, b[i], points=[0])[0]
    return res

def t_integ_repul(b):
    res = np.zeros(b.shape[0])
    for i in range(b.shape[0]):
        res[i] = integrate.quad(t_repul, 0, b[i], points=[0])[0]
    return res

def t_integ_attr_(b):
    res = np.zeros(b.shape)
    for i in range(b.shape[1]):
        res[0][i] = integrate.quad(t_attr, 0, b[0][i], points=[0])[0]
    return res

def t_integ_repul_(b):
    res = np.zeros(b.shape)
    for i in range(b.shape[0]):
        res[i][0] = integrate.quad(t_repul, 0, b[i][0], points=[0])[0]
    return res


plt.figure(figsize=(28, 15))



plt.axes([0.047, 0.52, 0.2, 0.44])
x = np.linspace(0.0001, 100, num=7000)# d_ij
y = np.linspace(0.0001, 100, num=7000)# d_ik
xx, yy = np.meshgrid(x, y, sparse=True)
tsne_loss = -t_integ_attr_(xx) - t_integ_repul_(yy)
tsne_U = t_attr(xx) + 0 * yy
tsne_V = t_repul(yy) + 0 * xx
plt.streamplot(xx, yy, tsne_U, tsne_V, density=(2.4, 1.0), linewidth=0.8, arrowsize=2.5, maxlength=1.)
im = plt.imshow(tsne_loss, origin='lower', extent=(0.0001, 100, 0.0001, 100), cmap=cmap)
cb = plt.colorbar(im)
cb.ax.tick_params(labelsize=23)
plt.title('Loss (t-SNE)', fontsize=38)
plt.xticks(fontsize=23)
plt.yticks(fontsize=23)
plt.xlabel(r'$d_{ij}$', fontsize=38)
plt.ylabel(r'$d_{ik}$', fontsize=38)

plt.axes([0.047, 0.03, 0.2, 0.44])
tsne_grad_inten = np.sqrt(tsne_U ** 2 + tsne_V ** 2)
tsne_grad_inten = np.array(tsne_grad_inten)
for i in range(tsne_grad_inten.shape[0]):
    for j in range(tsne_grad_inten.shape[1]):
        if tsne_grad_inten[i, j] > 0.00005:
            tsne_grad_inten[i, j] = 0.00005
plt.streamplot(xx, yy, tsne_U, tsne_V, density=(2.4, 1.0), linewidth=0.8, arrowsize=2.5, maxlength=1.)
im = plt.imshow(tsne_grad_inten, origin='lower', extent=(0.0001, 100, 0.0001, 100), cmap=cmap_)
cb = plt.colorbar(im)
cb.ax.tick_params(labelsize=23)
plt.title('Gradient magnitude', fontsize=34)
plt.xticks(fontsize=23)
plt.yticks(fontsize=23)
plt.xlabel(r'$d_{ij}$', fontsize=38)
plt.ylabel(r'$d_{ik}$', fontsize=38)



plt.axes([0.293, 0.52, 0.2, 0.44])
x = np.linspace(0.0001, 25, num=7000) # d_ij
y = np.linspace(0.0001, 25, num=7000) # d_ik
xx, yy = np.meshgrid(x, y, sparse=True)
u_loss = -integ_attr(xx) -integ_repul(yy)
u_U = attr(xx) + 0*yy
u_V = repul(yy) + 0*xx
plt.streamplot(xx, yy, u_U, u_V, density=(2.4, 1.0), linewidth=0.8, arrowsize=2.5, maxlength=1.)
im = plt.imshow(u_loss, origin='lower', extent=(0.0001, 25, 0.0001, 25), cmap=cmap)
cb = plt.colorbar(im)
cb.ax.tick_params(labelsize=23)
plt.title('Loss (UMAP)', fontsize=38)
plt.xticks(fontsize=23)
plt.yticks(fontsize=23)
plt.xlabel(r'$d_{ij}$', fontsize=38)
plt.ylabel(r'$d_{ik}$', fontsize=38)

plt.axes([0.293, 0.03, 0.2, 0.44])
u_grad_inten = np.sqrt(u_U ** 2 + u_V ** 2)
for i in range(u_grad_inten.shape[0]):
    for j in range(u_grad_inten.shape[1]):
        if u_grad_inten[i, j] > 1:
            u_grad_inten[i, j] = 1
plt.streamplot(xx, yy, u_U, u_V, density=(2.4, 1.0), linewidth=0.8, arrowsize=2.5, maxlength=1.)
im = plt.imshow(u_grad_inten, origin='lower', extent=(0.0001, 25, 0.0001, 25), cmap=cmap_)
cb = plt.colorbar(im)
cb.ax.tick_params(labelsize=23)
plt.title('Gradient magnitude', fontsize=34)
plt.xticks(fontsize=23)
plt.yticks(fontsize=23)
plt.xlabel(r'$d_{ij}$', fontsize=38)
plt.ylabel(r'$d_{ik}$', fontsize=38)


plt.axes([0.543, 0.52, 0.2, 0.44])
x = np.linspace(0.0001, 200, num=7000) # d_ij
y = np.linspace(0.0001, 200, num=7000) # d_ik
xx, yy = np.meshgrid(x, y, sparse=True)
t_loss = (1.0 + xx**2)/(2.0 + xx**2 + yy**2)
t_U = (2*xx + 2 * xx * yy**2)/(2 + xx**2 + yy**2)**2
t_V = (-2*yy*(1 + xx**2))/(2 + xx**2 + yy**2)**2
plt.streamplot(xx, yy, -t_U, -t_V, density=(2.4, 1.0), linewidth=0.8, arrowsize=2.4, maxlength=1.)
im = plt.imshow(t_loss, origin='lower', extent=(0.0001, 200, 0.0001, 200), cmap=cmap)
cb = plt.colorbar(im)
cb.ax.tick_params(labelsize=23)
plt.title('Loss (TriMAP)', fontsize=38)
plt.xticks([50, 100, 150, 200], fontsize=23)
plt.yticks([50, 100, 150, 200], fontsize=23)
plt.xlabel(r'$d_{ij}$', fontsize=38)
plt.ylabel(r'$d_{ik}$', fontsize=38)

plt.axes([0.543, 0.03, 0.2, 0.44])
t_grad_inten = np.sqrt(t_U ** 2 + t_V ** 2)
for i in range(t_grad_inten.shape[0]):
    for j in range(t_grad_inten.shape[1]):
        if t_grad_inten[i, j] > 0.012:
            t_grad_inten[i,j] = 0.012
plt.streamplot(xx, yy, -t_U, -t_V, density=(2.4, 1.0), linewidth=0.8, arrowsize=2.5, maxlength=1.)
im = plt.imshow(t_grad_inten, origin='lower', extent=(0.0001, 200, 0.0001, 200), cmap=cmap_)
cb = plt.colorbar(im)
cb.ax.tick_params(labelsize=23)
plt.title('Gradient magnitude', fontsize=34)
plt.xticks([50, 100, 150, 200],fontsize=23)
plt.yticks([50, 100, 150, 200], fontsize=23)
plt.xlabel(r'$d_{ij}$', fontsize=38)
plt.ylabel(r'$d_{ik}$', fontsize=38)


plt.axes([0.795, 0.52, 0.2, 0.44])
x = np.linspace(0.0001, 50, num=7000) # d_ij
y = np.linspace(0.0001, 50, num=7000) # d_ik
xx, yy = np.meshgrid(x, y, sparse=True)
p_loss = 1.5 * (xx**2 + 1)/(11.0 + xx**2) + 3.0/(2.0 + yy**2)
p_U = -1.5 * (20*xx)/(11.0 + xx**2)**2 + (0 * yy)
p_V = 3 * (2*yy)/(2 + yy**2)**2 + (0 * xx)
plt.streamplot(xx, yy, p_U, p_V, density=(2.4, 1.0), linewidth=0.8, arrowsize=2.4, maxlength=1.)
im = plt.imshow(p_loss, origin='lower', extent=(0.01, 50, 0.01, 50), cmap=cmap)
cb = plt.colorbar(im)
cb.ax.tick_params(labelsize=23)
plt.title('Loss (PaCMAP)', fontsize=38)
plt.xticks(fontsize=23)
plt.yticks(fontsize=23)
plt.xlabel(r'$d_{ij}$', fontsize=38)
plt.ylabel(r'$d_{ik}$', fontsize=38)

plt.axes([0.795, 0.03, 0.2, 0.44])
p_grad_inten = np.sqrt(p_U ** 2 + p_V ** 2)
plt.streamplot(xx, yy, p_U, p_V, density=(2.4, 1.0), linewidth=0.8, arrowsize=2.5, maxlength=1.)
for i in range(p_grad_inten.shape[0]):
    for j in range(p_grad_inten.shape[1]):
        if p_grad_inten[i, j] > 0.5:
            p_grad_inten[i,j] = 0.5
im = plt.imshow(p_grad_inten, origin='lower', extent=(0.0001, 50, 0.0001, 50), cmap=cmap_)
cb = plt.colorbar(im)
cb.ax.tick_params(labelsize=23)
plt.title('Gradient magnitude', fontsize=34)
plt.xticks(fontsize=23)
plt.yticks(fontsize=23)
plt.xlabel(r'$d_{ij}$', fontsize=38)
plt.ylabel(r'$d_{ik}$', fontsize=38)


plt.savefig('rainbow_good_loss')


In [None]:
import argparse
from typing import Optional

import numpy as np
import matplotlib.pyplot as plt
import pacmap

def load_data(data_path: str, labels_path: str) -> tuple[np.ndarray, np.ndarray]:
    """
    Load preprocessed data and labels from the specified file paths.

    Args:
        data_path (str): Path to the data file in .npy format.
        labels_path (str): Path to the labels file in .npy format.

    Returns:
        tuple[np.ndarray, np.ndarray]: A tuple containing the loaded data and labels.

    Raises:
        FileNotFoundError: If the specified data or labels file is not found.
        ValueError: If the loaded data or labels have an invalid shape.
    """
    try:
        data = np.load(data_path, allow_pickle=True)
        labels = np.load(labels_path, allow_pickle=True)
    except FileNotFoundError as e:
        raise FileNotFoundError(f"Data or labels file not found: {e}")

    if data.ndim != 2:
        raise ValueError(f"Invalid data shape. Expected 2D array, got {data.ndim}D.")
    if labels.ndim != 1:
        raise ValueError(f"Invalid labels shape. Expected 1D array, got {labels.ndim}D.")

    return data.reshape(data.shape[0], -1), labels

def visualize_embedding(embedding: np.ndarray, labels: np.ndarray, figsize: tuple[int, int] = (6, 6)) -> None:
    """
    Visualize the embedding using a scatter plot.

    Args:
        embedding (np.ndarray): The transformed embedding data.
        labels (np.ndarray): The corresponding labels for each data point.
        figsize (tuple[int, int], optional): The figure size. Defaults to (6, 6).
    """
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    ax.scatter(embedding[:, 0], embedding[:, 1], cmap="Spectral", c=labels, s=0.6)
    plt.show()

def main(data_path: str, labels_path: str, n_components: int, n_neighbors: Optional[int],
         mn_ratio: float, fp_ratio: float) -> None:
    """
    Main function to run the PaCMAP embedding and visualization.

    Args:
        data_path (str): Path to the data file in .npy format.
        labels_path (str): Path to the labels file in .npy format.
        n_components (int): Number of components for the embedding.
        n_neighbors (Optional[int]): Number of nearest neighbors to consider. None for default.
        mn_ratio (float): Ratio of mid-near pairs to sample.
        fp_ratio (float): Ratio of further pairs to sample.
    """
    try:
        data, labels = load_data(data_path, labels_path)
    except (FileNotFoundError, ValueError) as e:
        print(f"Error loading data: {e}")
        return

    embedding = pacmap.PaCMAP(n_components=n_components, n_neighbors=n_neighbors,
                              MN_ratio=mn_ratio, FP_ratio=fp_ratio)
    transformed_data = embedding.fit_transform(data, init="pca")
    visualize_embedding(transformed_data, labels)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="PaCMAP embedding and visualization")
    parser.add_argument("--data_path", type=str, default="./data/coil_20.npy", help="Path to the data file")
    parser.add_argument("--labels_path", type=str, default="./data/coil_20_labels.npy", help="Path to the labels file")
    parser.add_argument("--n_components", type=int, default=2, help="Number of components for the embedding")
    parser.add_argument("--n_neighbors", type=int, default=None, help="Number of nearest neighbors to consider")
    parser.add_argument("--mn_ratio", type=float, default=0.5, help="Ratio of mid-near pairs to sample")
    parser.add_argument("--fp_ratio", type=float, default=2.0, help="Ratio of further pairs to sample")
    args = parser.parse_args()

    main(args.data_path, args.labels_path, args.n_components, args.n_neighbors, args.mn_ratio, args.fp_ratio)


In [None]:
import pacmap
import numpy as np
import matplotlib.pyplot as plt

# loading preprocessed coil_20 dataset
# you can change it with any dataset that is in the ndarray format, with the shape (N, D)
# where N is the number of samples and D is the dimension of each sample
X = np.load("./data/coil_20.npy", allow_pickle=True)
X = X.reshape(X.shape[0], -1)
y = np.load("./data/coil_20_labels.npy", allow_pickle=True)

# initializing the pacmap instance
# Setting n_neighbors to "None" leads to a default choice shown below in "parameter" section
embedding = pacmap.PaCMAP(n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0)

# fit the data (The index of transformed data corresponds to the index of the original data)
X_transformed = embedding.fit_transform(X, init="pca")

# visualize the embedding
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
ax.scatter(X_transformed[:, 0], X_transformed[:, 1], cmap="Spectral", c=y, s=0.6)


In [None]:
import pandas as pd
import pacmap
import numpy as np
import plotly.express as px

pd.set_option("display.max_colwidth", None)

# Generate random test data
num_samples = 100000
num_features = 2
num_classes = 5

data = np.random.rand(num_samples, num_features)
labels = np.random.randint(0, num_classes, size=num_samples)

# Create a PaCMAP instance
embedding_projector = pacmap.PaCMAP(
    n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0, random_state=1
)

# Fit the data
embedding_projector.fit(data, init="pca")

# Transform the data and the query point
documents_projected = embedding_projector.transform(data)
query_point = np.random.rand(1, num_features)
query_projected = embedding_projector.transform(query_point)

# Create a DataFrame for visualization
df = pd.DataFrame({
    "x": documents_projected[:, 0],
    "y": documents_projected[:, 1],
    "class": labels,
    "size_col": 4,
    "symbol": "circle"
})

query_df = pd.DataFrame({
    "x": query_projected[0, 0],
    "y": query_projected[0, 1],
    "class": "Query",
    "size_col": 100,
    "symbol": "star"
}, index=[0])

df = pd.concat([df, query_df], ignore_index=True)

# Visualize the embedding
fig = px.scatter(
    df,
    x="x",
    y="y",
    color="class",
    size="size_col",
    symbol="symbol",
    color_discrete_map={"Query": "black"},
    width=1000,
    height=700,
)
fig.update_traces(
    marker=dict(opacity=1, line=dict(width=0, color="DarkSlateGrey")), selector=dict(mode="markers")
)
fig.update_layout(
    legend_title_text="<b>Class</b>",
    title="<b>2D Projection of Test Data via PaCMAP</b>",
)
fig.show()
