# 1000 genomes figure

In [1]:
#! python3
# -*-coding:Utf-8 -*

########################################################################################################
########################################################################################################

# Create figure depicting 2x3 embeddings of 1000 Genomes Project data.

# This file contains elements of code written by Alex Diaz-Papkovich.

########################################################################################################
########################################################################################################

import numpy as np, utils.plot_fcts as plot_fcts, paths, params, pandas as pd

# Name of this file
#module_name = "genomes-figure.py"

##############################
############################## 
####################

# Loading metadata
labels_vec = pd.read_csv("{v}affy_samples.20141118.panel".format(v=paths.genomes_data), delimiter="\t")["pop"].values

color_dict = {"ACB":"#bd9e39","ASW":"#8c6d31","BEB":"#637939","CDX":"#393b79","CEU":"#d6604d","CHB":"#5254a3","CHS":"#9e9ac8","CLM":"#7b4173","ESN":"#e7ba52","FIN":"#ad494a","GBR":"#843c39","GIH":"#8ca252","GWD":"#e7cb94","IBS":"#d6616b","ITU":"#b5cf6b","JPT":"#6b6ecf","KHV":"#9c9ede","LWK":"#7f3b08","MSL":"#b35806","MXL":"#a55194","PEL":"#ce6dbd","PJL":"#cedb9c","PUR":"#de9ed6","STU":"#c7e9c0","TSI":"#e7969c","YRI":"#e08214"}

labels = np.empty(shape=labels_vec.size, dtype="U7")
for i, v in enumerate(labels_vec):
    labels[i] = color_dict[v]

label_descr = pd.read_csv("{v}20131219.populations.tsv".format(v=paths.genomes_data), sep='\t')

D_superpop = {"AFR":{'superpop_descr':"African", 'idx':[], 'L_colors':[]}, "EUR":{'superpop_descr':"European", 'idx':[], 'L_colors':[]}, "AMR":{'superpop_descr':"Central/South American", 'idx':[], 'L_colors':[]}, "SAS":{'superpop_descr':"South Asian", 'idx':[], 'L_colors':[]}, "EAS":{'superpop_descr':"East Asian", 'idx':[], 'L_colors':[]}}

D_pop = {}
for df_idx, df_row in label_descr.iterrows():
    D_pop[df_row["Population Code"]] = {'pop_descr':df_row["Population Description"], 'idx':[]}
    for i, v in enumerate(labels_vec):
        if v == df_row["Population Code"]:
            D_pop[df_row["Population Code"]]['idx'].append(i)
            D_superpop[df_row["Super Population"]]['idx'].append(i)
            D_superpop[df_row["Super Population"]]['L_colors'].append(color_dict[df_row["Population Code"]])

D_pop_arr = {}
for k in D_pop.keys():
    if len(D_pop[k]['idx']) > 0:
        D_pop_arr[k] = {'pop_descr':D_pop[k]['pop_descr'], 'color':color_dict[k], 'idx':np.asarray(D_pop[k]['idx'], dtype=np.int64)}

D_superpop_arr = {}
for k in D_superpop.keys():
    if len(D_superpop[k]['idx']) > 0:
        superpop_color = plot_fcts.rgb_to_hex(plot_fcts.arr_hex_to_rgb(np.asarray(D_superpop[k]['L_colors'])).mean(axis=0).astype(np.int32))
        D_superpop_arr[k] = {'superpop_descr':D_superpop[k]['superpop_descr'], 'idx':np.asarray(D_superpop[k]['idx'], dtype=np.int64), 'color':superpop_color}

# Dictionary that will contain entries to annotate some 2-D embeddings.
D_viz_emb = dict()
D_viz_emb['D_pop'] = D_pop_arr
D_viz_emb['D_superpop'] = D_superpop_arr

plot_fcts.create_2x3_figure(data_name=paths.genomes_name, emb_path=paths.genomes_emb, fig_path=paths.genomes_fig, arr_colors=labels, f_format=params.f_format, D_viz_emb=D_viz_emb)

print('*********************')
print('***** Done! :-) *****')
print('*********************')


===
=== Creating the 2x3 figure for 1000 Genomes Project data
===
*********************
***** Done! :-) *****
*********************


# Kanton et al. figure

In [5]:
#! python3
# -*-coding:Utf-8 -*

########################################################################################################
########################################################################################################

# Create figure depicting 2x3 embeddings of Kanton et al data.

# This file contains elements of code written by Sebastian Damrich and made publicly available following this link: https://github.com/berenslab/ne_spectrum_scRNAseq/blob/main/utils/utils.py#L99

########################################################################################################
########################################################################################################

import numpy as np, utils.plot_fcts as plot_fcts, paths, params
import colormaps
# # Name of this file
# module_name = "kanton-figure.py"

##############################
##############################
####################

print('Loading {v} data'.format(v=paths.kanton_name))
X_PCs = np.load('{p}pca_True/human-409b2.data.npy'.format(p=paths.kanton_data))[:,:2]

# Loading metadata
labels = np.load('{v}human-409b2.labels.npy'.format(v=paths.kanton_data))

cmap = colormaps.thermal

# Generate 8 equally spaced colors from the colormap
n_colors = 8
colors_hex = [cmap(i / (n_colors - 1)) for i in range(n_colors)]
# Convert to hex strings
import matplotlib.colors as mcolors
colors_hex = [mcolors.to_hex(color) for color in colors_hex]

d = {"label_colors": {
    "iPSCs": colors_hex[0],
    "EB": colors_hex[1],
    "Neuroectoderm": colors_hex[2],
    "Neuroepithelium": colors_hex[3],
    "Organoid-1M": colors_hex[4],
    "Organoid-2M": colors_hex[5],
    "Organoid-3M": colors_hex[6],
    "Organoid-4M": colors_hex[7],
}, "time_colors": {
    "  0 days": colors_hex[0],
    "  4 days": colors_hex[1],
    "10 days": colors_hex[2],
    "15 days": colors_hex[3],
    "  1 month": colors_hex[4],
    "  2 months": colors_hex[5],
    "  3 months": colors_hex[6],
    "  4 months": colors_hex[7],
}, "colors_time": {
    colors_hex[0]:"0 days",
    colors_hex[1]:"4 days",
    colors_hex[2]:"10 days",
    colors_hex[3]:"15 days",
    colors_hex[4]:"1 month",
    colors_hex[5]:"2 months",
    colors_hex[6]:"3 months",
    colors_hex[7]:"4 months",
}}

for i, v in enumerate(labels):
    labels[i] = d['label_colors'][v]

D_samp_by_time = {d["colors_time"][k]:[] for k in d["colors_time"]}
for i, v in enumerate(labels):
    D_samp_by_time[d["colors_time"][v]].append(i)
D_samp_by_time_arr = {}
for k in D_samp_by_time:
    if len(D_samp_by_time[k]) > 0:
        D_samp_by_time_arr[k] = np.asarray(D_samp_by_time[k], dtype=np.int64)

plot_fcts.create_2x3_figure(data_name=paths.kanton_name, emb_path=paths.kanton_emb, fig_path=paths.kanton_fig, arr_colors=labels, f_format=params.f_format, X_PCs=X_PCs, D_viz_emb={'D_samp_by_time':D_samp_by_time_arr})

print('*********************')
print('***** Done! :-) *****')
print('*********************')


Loading Kanton et al data
===
=== Creating the 2x3 figure for Kanton et al data
===
*********************
***** Done! :-) *****
*********************


# Tasic-figure


In [5]:
#! python3
# -*-coding:Utf-8 -*

########################################################################################################
########################################################################################################

# Create figure depicting 2x3 embeddings of Tasic et al data.

########################################################################################################
########################################################################################################

import numpy as np, utils.plot_fcts as plot_fcts, paths, params, pandas as pd

# Name of this file
module_name = "tasic-figure.py"

##############################
############################## 
####################

print('Loading {v} data'.format(v=paths.tasic_name))
X_PCs = np.load('{p}preprocessed-data.npy'.format(p=paths.tasic_data))[:,:2]

# Loading metadata
clusterInfo = pd.read_csv('{v}tasic-sample_heatmap_plot_data.csv'.format(v=paths.tasic_data))
ids        = clusterInfo['cluster_id'].values
labels_orig     = clusterInfo['cluster_label'].values
colors     = clusterInfo['cluster_color'].values
clusterNames  = np.array([labels_orig[ids==i+1][0] for i in range(np.max(ids))])
clusterColors = np.array([colors[ids==i+1][0] for i in range(np.max(ids))])
clusters   = np.copy(ids) - 1

# Dictionary that will contain entries to annotate some 2-D embeddings.
D_viz_emb = dict()
D_viz_emb['clusters'] = clusters
D_viz_emb['clusterColors'] = clusterColors
D_viz_emb['clusterNames'] = clusterNames

plot_fcts.create_2x3_figure(data_name=paths.tasic_name, emb_path=paths.tasic_emb, fig_path=paths.tasic_fig, arr_colors=colors, f_format=params.f_format, X_PCs=X_PCs, D_viz_emb=D_viz_emb)

print('*********************')
print('***** Done! :-) *****')
print('*********************')


Loading Tasic et al data
===
=== Creating the 2x3 figure for Tasic et al data
===
*********************
***** Done! :-) *****
*********************
