# Plot tSNE, UMAP, and PCA for data visualization

Run tSNE and UMAP multiple times and get the mean embedding and the standard deviation.



In [None]:
import pandas as pd
import easygui as eg
import sys

sys.path.append(r"C:\Users\Fer\Documents\GitHub")

from scripts_notebooks_fossa.pycombat_umap import combat_util

%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

## 1. Inputs

In [None]:
myfile = eg.fileopenbox(msg="Choose a file", default=r"F:")
print('Filename', myfile)
df = pd.read_csv(myfile)
df.head()

In [None]:
n_neighbors_input = 15
min_dist_input = 0.5
metric = 'cosine'
hover_list = ['Metadata_Plate','Metadata_Well', 'Metadata_compound', 'Metadata_concentration_uM']
run_iterations=100

In [None]:
cols_to_join = ["Metadata_compound", "Metadata_concentration_uM"]

df, new_col = combat_util.col_generator(df, cols_to_join = cols_to_join)

#just remove the 0 for the non-treated wells
df[new_col] = df[new_col].str.replace(r' 0', ' 20', regex=True)
df[new_col].unique()

## 2. Filter samples if needed

- Based on profile evaluation such as mean average precision, the profile for a specific sample can be non-reproducible, so we filter them here.

In [None]:
list_filter_OUT=[
 'Orphenadrine 1'
 ]

In [None]:
df_filtered = df.query(f'{new_col} not in {list_filter_OUT}').reset_index(drop=True)

## 3. Run tSNE divergence and visualization

In [None]:
X, plot_tsne = combat_util.tsne_generator(df_filtered, perplexity=30, n_components = 2, metric=metric, iterate=True, number_runs=run_iterations)

In [None]:
combat_util.tsne_divergence(X, range=65)

In [None]:
combat_util.plot_tsne(plot_tsne, color_col='Metadata_compound',
                      hover_cols=hover_list,
                      size=True, size_col = "Metadata_concentration_uM",
                      dili_color=True,
                      x="0", y="1",
                      error_x="x_err", error_y="y_err"
                      )

## 4. Run UMAP multiple times and plot

In [None]:
plot_umap = combat_util.generate_x_y_umap(df_filtered, n_neighbors=n_neighbors_input, min_dist=min_dist_input, 
                                          metric=metric, iterate=True, number_runs=run_iterations)

In [None]:
combat_util.plot_umap(plot_umap, color_col='Metadata_compound', 
                      hover_cols=hover_list,
                      size=True, size_col = "Metadata_concentration_uM",
                      dili_color=True,
                      x="0", y="1",
                      error_x="x_err", error_y="y_err"
                      )

## 5. Plot PCA

In [None]:
plot_pca = combat_util.generate_pca(df_filtered, n_components = 5)

In [None]:
combat_util.plot_umap(plot_pca, color_col='Metadata_compound', 
                      hover_cols=hover_list,
                      size=True, size_col = "Metadata_concentration_uM",
                      dili_color=True,
                      x="0", y="1"
                      )

In [None]:
combat_util.plot_umap(plot_pca, color_col='Metadata_compound', 
                      hover_cols=hover_list,
                      size=True, size_col = "Metadata_concentration_uM",
                      dili_color=True,
                      x="1", y="2"
                      )

In [None]:
combat_util.plot_umap(plot_pca, color_col='Metadata_compound',
                      hover_cols=hover_list,
                      size=True, size_col = "Metadata_concentration_uM",
                      dili_color=True,
                      x="0", y="2"
                      )