In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
from random import random
import seaborn as sns
import numpy as np

# Function definitions:

In [None]:
def discretize_q_values(row, col_q_val):
    q_val = row[col_q_val]
    if q_val < 0.01:
        q_val_d = 3
    elif q_val < 0.05:
        q_val_d = 2
    else:
        q_val_d = 1
    return q_val_d

In [None]:
def unknown_essential_xy(TnSeq_screen, df_data, df_uk, rand_param = 0.85):

    # Grab data for a single TnSeq screen
    cols = [col for col in df_data.columns if TnSeq_screen in col]
    df_data_test = df_data[['Rv_ID', 'gene_name'] + cols].copy()
    
    # Discretize q-values: 
    col_q_val = [col for col in df_data_test.columns if 'q_val' in col][0]
    df_data_test['q_val_D'] = df_data_test.apply(discretize_q_values, 1, args=[col_q_val])
    
    # Merge with unknowns: 
    df_vis = df_data_test.merge(df_uk, on = ['Rv_ID', 'gene_name'], how = 'inner')
    
    # Get x-y datasets: 
    rv_ids = df_vis.Rv_ID.values
    uk_list = np.array(df_vis.UK_score_4)
    q_list = np.array(df_vis.q_val_D)
    
    # randomize: 
    uk_rd = np.array([uk + rand_param*random()-rand_param/2 for uk in uk_list])
    q_rd = np.array([q + rand_param*random()-rand_param/2 for q in q_list])
    
    # color the unknown-essentials differently: 
    current_palette = sns.color_palette()
    # all genes are gray by default. 
    color_list = np.array([(0.85,0.85,0.85)]*df_vis.shape[0])
    # Unknown essentials in a different color. 
    ind_temp = list(df_vis[(df_vis.q_val_D == 3) & (df_vis.UK_score_4 == 4)].index)
    color_list[ind_temp] = current_palette[0]
    
    return uk_rd, q_rd, color_list, rv_ids

### Load Tn-seq data:

In [None]:
path_data = '../../data/'
df_data = pd.read_excel(os.path.join(path_data, 'Tn_library_DB_qval_log2FC.xlsx'))

### Load unknown score data:

In [None]:
df_uk = pd.read_csv(os.path.join(path_data, 'unknown_essentials/unknown_ALL_levels_essential_scores.csv'))
df_uk = df_uk[['Rv_ID', 'gene_name', 'UK_score_4']]

### Grab essentiality data for single TnSeq screen:

In [None]:
TnSeq_screen = '2012_Zhang'
uk_rd, q_rd, color_list, rv_ids = unknown_essential_xy(TnSeq_screen, df_data, df_uk)

In [None]:
color_list_rgb = ['rgb(' + ', '.join([str(np.round(rgb,2)) for rgb in col]) + ')' for col in color_list]

In [None]:
for col in color_list:
    col_rgb = 
col_rgb

### Scatter plot

In [None]:
plt.figure(figsize = (20,14))
plt.scatter(uk_rd, q_rd, s = 400, edgecolors='k', alpha = 0.75, color = color_list, linewidths=3)
plt.xlabel('Annotation', fontsize = 44)
plt.ylabel('Essentiality', fontsize = 44)
plt.xticks([0, 1, 2, 3, 4], ['most well\ncharacterized','' , '', '', 'least\ncharacterized'], fontsize = 30)
plt.yticks([1.25, 2.25, 3.25], ['non-essential' ,'q-val < 0.05', 'q-val < 0.01'], fontsize = 30, rotation = 90)

for x in np.arange(0.5, 4.5, 1):
    yvals = np.arange(0, 4, 0.01)
    xvals = len(yvals)*[x]
    plt.plot(xvals, yvals, '--', color = "k")
    
for y in np.arange(1.5, 3.5, 1):
    xvals = np.arange(-0.5, 4.5, 0.01)
    yvals = len(xvals)*[y]
    plt.plot(xvals, yvals, '--', color = "k")
    
plt.xlim(-0.5, 4.5)
plt.ylim(0.5, 3.5)

plt.tight_layout()


In [None]:
plt.figure(figsize = (20,14))
plt.scatter(uk_rd, q_rd_2, s = 400, edgecolors='k', alpha = 0.75, color = color_list, linewidths=3)
# plt.scatter(uk_rd_2[ind_temp], q_rd_2[ind_temp], s = 400, edgecolors='k', alpha = 0.75, color = color_list[ind_temp], linewidths=3)
plt.xlabel('Annotation', fontsize = 44)
plt.ylabel('Essentiality', fontsize = 44)
plt.xticks([0, 1, 2, 3, 4], ['most well\ncharacterized','' , '', '', 'least\ncharacterized'], fontsize = 30)
plt.yticks([1.25, 2.25, 3.25], ['non-essential' ,'q-val < 0.05', 'q-val < 0.01'], fontsize = 30, rotation = 90)

for x in np.arange(0.5, 4.5, 1):
    yvals = np.arange(0, 4, 0.01)
    xvals = len(yvals)*[x]
    plt.plot(xvals, yvals, '--', color = "k")
    
for y in np.arange(1.5, 3.5, 1):
    xvals = np.arange(-0.5, 4.5, 0.01)
    yvals = len(xvals)*[y]
    plt.plot(xvals, yvals, '--', color = "k")
    
plt.xlim(-0.5, 4.5)
plt.ylim(0.5, 3.5)

plt.tight_layout()

# fig_path = '../../figures/genome_visualizations/q_discretized.png'
# plt.savefig(fig_path, dpi = 150)

In [None]:
def fig_function(uk, q, color_list):    
    plt.figure(figsize = (25,14))

    plt.scatter(uk, q, s = 900, edgecolors='k', alpha = 0.75, color = color_list, linewidths=3)
    plt.xlabel('Annotation', fontsize = 44)
    plt.ylabel('Essentiality\n', fontsize = 44)
    plt.xticks([0, 1, 2, 3, 4], ['most well\ncharacterized','' , '', '', 'least\ncharacterized'], fontsize = 30)
    plt.yticks([1.25, 2.25, 3.25], ['non-essential' ,'', 'essential'], fontsize = 30, rotation = 90)

    for x in np.arange(0.5, 4.5, 1):
        yvals = np.arange(0, 4, 0.01)
        xvals = len(yvals)*[x]
        plt.plot(xvals, yvals, '--', color = "k")


    for y in np.arange(1.5, 3.5, 1):
        xvals = np.arange(-0.5, 4.5, 0.01)
        yvals = len(xvals)*[y]
        plt.plot(xvals, yvals, '--', color = "k")

    plt.xlim(-0.5, 4.5)
    plt.ylim(0.5, 3.5)


In [None]:
text_str_1 = 'unknown essentials\nin condition 1\n\ncarbon source:\nglycerol'
text_str_2 = 'unknown essentials\nin condition 2\n\nisoniazid'

In [None]:
counter = 0
fig_function(uk_rd, q_rd, color_list)
# plt.scatter(uk_rd[ind_temp_2], q_rd[ind_temp_2], s = 900, edgecolors='k', alpha = 0.75, color = color_list[ind_temp_2], linewidths=3)
ax = plt.gca()
plt.text(1.05, 0.7, text_str_1, transform=ax.transAxes, fontsize = 40)
plt.tight_layout()
fig_path = '../../figures/genome_visualizations/q_discretized_C1.png'
plt.savefig(fig_path, dpi = 100)

In [None]:
counter = 0
fig_function(uk_rd, q_rd, color_list)
plt.scatter(uk_rd[ind_temp_2], q_rd[ind_temp_2], s = 900, edgecolors='k', alpha = 0.75, color = color_list[ind_temp_2], linewidths=3)
ax = plt.gca()
plt.text(1.05, 0.7, text_str_1, transform=ax.transAxes, fontsize = 40)
plt.tight_layout()
fig_path = '../../figures/genome_visualizations/q_discretized_both.png'
plt.savefig(fig_path, dpi = 100)

In [None]:
fig_function(uk_rd_2, q_rd_2, color_list)
ax = plt.gca()
plt.text(1.05, 0.7, text_str_2, transform=ax.transAxes, fontsize = 40, color = current_palette[3])
plt.tight_layout()
fig_path = '../../figures/genome_visualizations/q_discretized_C2.png'
plt.savefig(fig_path, dpi = 150)

In [None]:
for counter in range(15):
    fig_function(uk_rd, q_rd, color_list)
    
#     plt.title(text_str, fontsize = 44)
    ax = plt.gca()
    plt.text(1.05, 0.7, text_str_1, transform=ax.transAxes, fontsize = 40, color = current_palette[0])
    plt.tight_layout()
    fig_path = '../../figures/genome_visualizations/q_discretized_'+str(counter)+'.png'
    plt.savefig(fig_path, dpi = 100)

In [None]:
counter = 15
t_range = np.arange(0,1.05,0.05)
for t in t_range:
    uk_rd_t = t*uk_rd_2 + (1-t)*uk_rd
    q_rd_t = t*q_rd_2 + (1-t)*q_rd
    
    fig_function(uk_rd_t, q_rd_t, color_list)
    ax = plt.gca()
    if counter <= 20:    
        plt.text(1.05, 0.7, text_str_1, transform=ax.transAxes, fontsize = 40)
    elif counter >= 30:
        plt.text(1.05, 0.7, text_str_2, transform=ax.transAxes, fontsize = 40)
    else:
        plt.text(1.05, 0.7, text_str_1, transform=ax.transAxes, fontsize = 40, color='white')
    plt.tight_layout()
    
    fig_path = '../../figures/genome_visualizations/q_discretized_'+str(counter)+'.png'
    plt.savefig(fig_path, dpi = 150)
    counter += 1

In [None]:
for counter in range(35, 50):
    fig_function(uk_rd_2, q_rd_2, color_list)
    ax = plt.gca()
    plt.text(1.05, 0.7, text_str_2, transform=ax.transAxes, fontsize = 40, color = current_palette[3])
    plt.tight_layout()
    fig_path = '../../figures/genome_visualizations/q_discretized_'+str(counter)+'.png'
    plt.savefig(fig_path, dpi = 150)