In [1]:
import os
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import glob
%matplotlib inline

In [2]:
fig_dir = "../figures/umap_out/4"
if not os.path.exists(fig_dir):
    os.mkdir(fig_dir)
savedir = "../results/umap_out/4"

In [3]:
meta_df = pd.read_csv("../data/meta.tsv",sep="\t", index_col=0)
meta_df

Unnamed: 0,Timepoint,Stimuli,Sample,Genotype,Stimuli Names
0_2G3,3,G,2,WT,Nec1s
1_2G3,3,G,2,WT,Nec1s
2_2G3,3,G,2,WT,Nec1s
3_2G3,3,G,2,WT,Nec1s
4_2G3,3,G,2,WT,Nec1s
...,...,...,...,...,...
4592_4B46,46,B,4,WT,zVD
4593_4B46,46,B,4,WT,zVD
4594_4B46,46,B,4,WT,zVD
4595_4B46,46,B,4,WT,zVD


In [4]:
raw = pickle.load(open("../data/processed/data_df_log10_z.p","rb"))
raw

Unnamed: 0,Cell Size,Cell Circularity,Cell Aspect Ratio,Cell Tracker Intensity,PI Intensity,AnexinV Intensity
0_2G3,4.514737,-35.280948,4.073811,-0.549886,-1.508243,-1.286249
1_2G3,-0.206776,-23.196179,18.581889,-1.036545,-1.525713,-1.696325
2_2G3,-1.666082,-10.317551,10.892828,-0.955933,-1.518481,-1.653310
3_2G3,-1.082179,-15.445497,13.969401,-0.843089,-1.504416,-1.656892
4_2G3,-3.274187,0.564852,2.419749,-0.857998,-1.514578,-1.643740
...,...,...,...,...,...,...
4592_4B46,0.928028,0.081115,-0.633219,-1.657626,-1.026704,0.483296
4593_4B46,1.411085,-5.132832,4.370616,-2.036756,-0.607149,0.626727
4594_4B46,0.641696,-0.609064,1.004787,-1.695808,0.370317,0.438746
4595_4B46,-1.253652,0.564852,0.284477,-0.612722,1.667999,0.419938


In [5]:
for curr_param in glob.glob(f"{savedir}/*p"):
    print(curr_param)
    inp = pickle.load(open(curr_param,"rb"))
    if not len(inp) == 2:
        continue
    embedding,samples_inds = inp[0], inp[1]
    curr_df = meta_df.loc[samples_inds]
    curr_df["embedding_1"] = embedding[:,0]
    curr_df["embedding_2"] = embedding[:,1]
    curr_df = pd.merge(curr_df, raw,how='inner',left_index=True, right_index=True)
    
    # Create figures
    color_labels = curr_df["Stimuli Names"].unique()
    rgb_values = (sns.color_palette("Set2", len(color_labels)))
    color_map = dict(zip(color_labels, rgb_values))

    plt.figure(figsize=(15,15))
    pallete=sns.color_palette("bright", len(color_labels))
    sns.scatterplot(data=curr_df,x="embedding_1", y="embedding_2", palette=pallete, hue='Stimuli Names',style="Genotype",
                            size=0.5)

    plt.legend(loc='upper right')

    
    neigh = curr_param.strip(".p").split("_")[-2]
    dist = curr_param.strip(".p").split("_")[-1]
    title = f"Neighbors={neigh}\nMinimum Distance={dist}\nN Samples = {len(embedding)}"
    plt.title(title)
    plt.savefig(os.path.join(fig_dir,f"{neigh}_{dist}_umap_stimuli.png"))
    plt.savefig(os.path.join(fig_dir,f"{neigh}_{dist}_umap_stimuli.pdf"))
    plt.close()
    
    for i in raw.columns.values:
        plt.figure(figsize=(15,15))
        pallete=sns.color_palette("bright", len(color_labels))
        sns.scatterplot(data=curr_df,x="embedding_1", y="embedding_2", hue=i,
                        style="Genotype", size=0.5)
        #curr_df.plot.scatter("embedding_1","embedding_2", hue=curr_df['Stimuli'])#.map(color_map),s=0.5)
        plt.legend(loc='upper right')
        plt.savefig(os.path.join(fig_dir,f"{neigh}_{dist}_{i.replace(' ','')}.png"))
        plt.savefig(os.path.join(fig_dir,f"{neigh}_{dist}_{i.replace(' ','')}.pdf"))
        plt.close()

../results/umap_out/4/embedding_0_10_0.p
../results/umap_out/4/embedding_0_5_0.1.p
../results/umap_out/4/embedding_0_15_0.1.p
../results/umap_out/4/embedding_0_3_0.1.p
../results/umap_out/4/embedding_0_100_0.p
../results/umap_out/4/embedding_1_5_0.1.p
../results/umap_out/4/embedding_0_100_0.1.p
../results/umap_out/4/embedding_0_200_0.1.p
../results/umap_out/4/embedding_1_3_0.p
../results/umap_out/4/embedding_0_15_0.p
../results/umap_out/4/embedding_1_10_0.1.p
../results/umap_out/4/embedding_0_3_0.p
../results/umap_out/4/embedding_1_5_0.p
../results/umap_out/4/embedding_1_100_0.p
../results/umap_out/4/embedding_0_5_0.p
../results/umap_out/4/embedding_0_200_0.p
../results/umap_out/4/embedding_1_3_0.1.p
../results/umap_out/4/embedding_1_15_0.1.p
../results/umap_out/4/embedding_1_100_0.1.p
../results/umap_out/4/embedding_1_10_0.p
../results/umap_out/4/embedding_0_10_0.1.p
../results/umap_out/4/embedding_1_15_0.p


In [6]:
n_neighbors_l = [500,1000]
min_distance_l = [0.1, 0]
n_subsample = 100
n_iter = 3

for i_ter in range(n_iter):
    print('i_ter',i_ter)
    for neigh in n_neighbors_l:
        print('number of neighbors', neigh)
        for dist in min_distance_l:
            print('minimum distance', dist)
            # File name
            curr_param = f"{savedir}/embedding_{i_ter}_{neigh}_{dist}.p" #umap_results
            print(curr_param)
            if not os.path.exists(curr_param):
                print("not here")
                continue
            inp = pickle.load(open(curr_param,"rb"))
            if not len(inp) == 2:
                continue
            embedding,samples_inds = inp[0], inp[1]
            curr_df = meta_df.loc[samples_inds]
            curr_df["embedding_1"] = embedding[:,0]
            curr_df["embedding_2"] = embedding[:,1]
            curr_df = pd.merge(curr_df, raw,how='inner',left_index=True, right_index=True)

            # Create figures
            color_labels = curr_df["Stimuli Names"].unique()
            rgb_values = (sns.color_palette("Set2", len(color_labels)))
            color_map = dict(zip(color_labels, rgb_values))

            plt.figure(figsize=(15,15))
            pallete=sns.color_palette("bright", len(color_labels))
            sns.scatterplot(data=curr_df,x="embedding_1", y="embedding_2", palette=pallete, hue='Stimuli Names',style="Genotype",
                                    size=0.5)

            plt.legend(loc='upper right')


            title = f"Neighbors={neigh}\nMinimum Distance={dist}\nN Samples = {len(embedding)}"

            neigh = curr_param.strip(".p").split("_")[-2]
            dist = curr_param.strip(".p").split("_")[-1]
            plt.title(title)
            plt.savefig(os.path.join(fig_dir,f"{neigh}_{dist}_{i_ter}_umap_stimuli.png"))
            plt.savefig(os.path.join(fig_dir,f"{neigh}_{dist}_{i_ter}_umap_stimuli.pdf"))
            plt.close()

            for i in raw.columns.values:
                plt.figure(figsize=(15,15))
                pallete=sns.color_palette("bright", len(color_labels))
                sns.scatterplot(data=curr_df,x="embedding_1", y="embedding_2", hue=i,
                                style="Genotype", size=0.5)
                #curr_df.plot.scatter("embedding_1","embedding_2", hue=curr_df['Stimuli'])#.map(color_map),s=0.5)
                plt.legend(loc='upper right')
                plt.savefig(os.path.join(fig_dir,f"{neigh}_{dist}_{i.replace(' ','')}.png"))
                plt.savefig(os.path.join(fig_dir,f"{neigh}_{dist}_{i.replace(' ','')}.pdf"))
                plt.close()

i_ter 0
number of neighbors 500
minimum distance 0.1
../results/umap_out/4/embedding_0_500_0.1.p
minimum distance 0
../results/umap_out/4/embedding_0_500_0.p
number of neighbors 1000
minimum distance 0.1
../results/umap_out/4/embedding_0_1000_0.1.p
minimum distance 0
../results/umap_out/4/embedding_0_1000_0.p
i_ter 1
number of neighbors 500
minimum distance 0.1
../results/umap_out/4/embedding_1_500_0.1.p
minimum distance 0
../results/umap_out/4/embedding_1_500_0.p
number of neighbors 1000
minimum distance 0.1
../results/umap_out/4/embedding_1_1000_0.1.p
not here
minimum distance 0
../results/umap_out/4/embedding_1_1000_0.p
not here
i_ter 2
number of neighbors 500
minimum distance 0.1
../results/umap_out/4/embedding_2_500_0.1.p
not here
minimum distance 0
../results/umap_out/4/embedding_2_500_0.p
not here
number of neighbors 1000
minimum distance 0.1
../results/umap_out/4/embedding_2_1000_0.1.p
not here
minimum distance 0
../results/umap_out/4/embedding_2_1000_0.p
not here
