In [1]:
import pandas as pd
import numpy as np

from nomic import atlas
from scipy import stats

In [2]:
data_name = 'ibd'
embedding_size = 128

In [3]:
embeddings_path = '../data/processed/mixture_embeddings/{}/cnn_hyperbolic_{}_mixture_embeddings.csv'.format(data_name, embedding_size)
metadata_path = '../data/interim/ihmp/{}_metadata.csv'.format(data_name)

In [4]:
embeddings_df = pd.read_csv(embeddings_path, index_col='Sample')
embeddings = embeddings_df.values

In [5]:
def replace_percentiles(dataframe, columns):
    
    # Calculate the 5th and 95th percentiles of the specified columns
    lower_percentiles = dataframe[columns].quantile(0.05)
    upper_percentiles = dataframe[columns].quantile(0.95)
    
    # Create boolean masks for values below the 5th percentile
    below_lower_mask = dataframe[columns].lt(lower_percentiles)
    
    # Create boolean masks for values above the 95th percentile
    above_upper_mask = dataframe[columns].gt(upper_percentiles)
    
    # Replace values below the 5th percentile with the lower_value using boolean indexing
    dataframe[columns] = np.where(below_lower_mask, lower_percentiles, dataframe[columns])
    
    # Replace values above the 95th percentile with the upper_value using boolean indexing
    dataframe[columns] = np.where(above_upper_mask, upper_percentiles, dataframe[columns])
    
    return dataframe

In [6]:
metadata_df = pd.read_csv(metadata_path)
metadata_df = metadata_df.fillna(metadata_df.mean())

cols = ['fb ratio']
metadata_df = replace_percentiles(metadata_df, cols)

data = list(metadata_df.T.to_dict().values())

  metadata_df = metadata_df.fillna(metadata_df.mean())


In [7]:
id_field = 'Sample' 
colorable_fields = metadata_df.columns.to_list()
name = '{} ({})'.format(data_name, embedding_size)

project = atlas.map_embeddings(
    embeddings=embeddings,
    data=data,
    name=name,
    id_field=id_field,
    colorable_fields=colorable_fields,
    reset_project_if_exists=True
    )

[32m2023-06-06 03:58:04.797[0m | [1mINFO    [0m | [36mnomic.project[0m:[36m__init__[0m:[36m871[0m - [1mFound existing project `ibd (128)` in organization `eitan.turok`. Clearing it of data by request.[0m
[32m2023-06-06 03:58:05.625[0m | [1mINFO    [0m | [36mnomic.project[0m:[36m_create_project[0m:[36m965[0m - [1mCreating project `ibd (128)` in organization `eitan.turok`[0m
[32m2023-06-06 03:58:06.998[0m | [1mINFO    [0m | [36mnomic.atlas[0m:[36mmap_embeddings[0m:[36m100[0m - [1mUploading embeddings to Atlas.[0m
1it [00:01,  1.06s/it]
[32m2023-06-06 03:58:08.076[0m | [1mINFO    [0m | [36mnomic.project[0m:[36m_add_data[0m:[36m1577[0m - [1mUpload succeeded.[0m
[32m2023-06-06 03:58:08.077[0m | [1mINFO    [0m | [36mnomic.atlas[0m:[36mmap_embeddings[0m:[36m119[0m - [1mEmbedding upload succeeded.[0m
[32m2023-06-06 03:58:08.900[0m | [1mINFO    [0m | [36mnomic.project[0m:[36mcreate_index[0m:[36m1282[0m - [1mCreated map `ibd 

In [8]:
project