# Create all possible tSNE

This is a quick and dirty script to create all possible tSNEs.

In [1]:
# %load ../start.py
# Imports
import os
import sys
from pathlib import Path
from tempfile import TemporaryDirectory
import string

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# Project level imports
sys.path.insert(0, '../../lib')
from larval_gonad.notebook import Nb
from larval_gonad.plotting import TSNEPlot

# Setup notebook
nbconfig = Nb.setup_notebook()

# Turn on cache
from joblib import Memory
memory = Memory(cachedir=nbconfig.cache, verbose=0)

last updated: 2018-01-08 
Git hash: 864e97e2be40d58ee219e1fa644c0339d05d6a4d


In [2]:
REF = os.environ['REFERENCES_DIR']
OUTPUT = '../../output/testes_scRNAseq_pilot'
Path(OUTPUT).mkdir(exist_ok=True)

FIGS = '../../output/figures/testis_tsne'
Path(FIGS).mkdir(exist_ok=True)

# Import gene annotations
fbgn2symbol = pd.read_csv(str(Path(REF, 'dmel/r6-16/fb_annotation/dmel_r6-16.fb_annotation')), sep='\t', 
                          usecols=['gene_symbol', 'primary_FBgn'], index_col='primary_FBgn').to_dict()['gene_symbol']

symbol2fbgn = pd.read_csv(str(Path(REF, 'dmel/r6-16/fb_annotation/dmel_r6-16.fb_annotation')), sep='\t', 
                          usecols=['gene_symbol', 'primary_FBgn'], index_col='gene_symbol').to_dict()['primary_FBgn']

In [3]:
tsne = pd.read_csv(Path(OUTPUT, 'tsne.tsv'), sep='\t')
norm = pd.read_csv(Path(OUTPUT, 'normalized_read_counts.tsv'), sep='\t')
data = tsne.join(norm.T)

In [4]:
def sanitize_fname(fname):
    valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
    return ''.join([x for x in fname if x in valid_chars])
    
def plot_gene(data, fbgn, symbol, **kwargs):
    symbol = sanitize_fname(symbol)
    fname = str(Path(FIGS, f'{fbgn}_{symbol}.png'))
    if Path(fname).exists():
        return
    
    df = data[['tSNE_1', 'tSNE_2', fbgn]]

    with plt.style.context(['paper-wide', 'default']):
        fig, (ax1, ax2) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [1.3, 1]})
        TSNEPlot('tSNE_2', 'tSNE_1', data=df, hue=fbgn, s=10, 
                 ax=ax1, title='Normalized Expression\n(Continuous)', **kwargs)

        TSNEPlot('tSNE_2', 'tSNE_1', data=df, hue=df[fbgn] > 0, 
                 cmap={
                     '0': 'w',
                     '1': 'k',
                 }, s=10, ax=ax2, alpha=.6, edgecolor='k', title='Normalized Expression\n(Binary)', **kwargs)

        fig.suptitle(f'{symbol} ({fbgn})');
        plt.tight_layout(rect=[0, 0, .9, .9])
        plt.savefig(fname)
        plt.close()

In [6]:
colors = sns.color_palette('Reds')
color2 = sns.color_palette('Greys')
colors[0] = color2[0]

for fbgn in data.columns[2:]:
    symbol = fbgn2symbol[fbgn]
    plot_gene(data, fbgn, symbol, palette=colors)