In [1]:
import repair_seq as rs
import hits.visualize.interactive
import templates

import matplotlib.colors

In [2]:
base_dir = '/lab/solexa_weissman/jah/projects/prime_editing_screens/dist'

pools = rs.pooled_screen.get_all_pools(base_dir)

In [3]:
gl = rs.guide_library.GuideLibrary(base_dir, 'AX227')

In [4]:
pool_names = [
    'K562_PE2_rep1',
    'K562_PE2_rep2',
    'K562_PE3+50_rep1',
    'K562_PE3+50_rep2',
    'HeLa_PE2_rep1',
    'HeLa_PE2_rep2',
    'HeLa_PE3+50_rep1',
    'HeLa_PE3+50_rep2',
    'HeLa_PE3-50_rep1',
    'HeLa_PE3-50_rep2',
]

categories = [
    'intended edit',
    'unintended annealing of RT\'ed sequence',
    'extension from intended annealing',
    'deletion',
    'duplication',
]

cat_aliases = {
    'intended edit': 'Intended edit',
    'deletion': 'Deletion',
    'duplication': 'Tandem duplication',
    'unintended annealing of RT\'ed sequence': 'Joining of RT\'ed sequence at unintended location',
    'extension from intended annealing': 'Installation of additional edits from nearly-matched scaffold sequence',
}

edit_percentages = {}

for pn in pool_names:
    for cat in categories:
        pool = pools[pn]
        edit_percentages[pn, cat] = pool.category_fractions.loc[cat] * 100

edit_percentages = pd.DataFrame(edit_percentages)

edit_percentages.index.name = 'CRISPRi sgRNA'
edit_percentages.columns.names = ['Screen condition', 'Outcome category']

In [5]:
fcs = edit_percentages / edit_percentages.loc['all_non_targeting']
l2fcs = np.log2(fcs)

  


In [6]:
colors = pd.Series('grey', index=edit_percentages.index)

gene_to_color = {
    'MSH2': 'tab:orange',
    'MSH6': 'tab:orange',
    'PMS2': 'tab:green',
    'MLH1': 'tab:green',
    'FEN1': 'tab:purple',
    'LIG1': 'tab:purple',
    'HLTF': 'tab:red',
    'EXO1': 'tab:blue',
}

for nt_guide_set in gl.non_targeting_guide_sets:
    gene_to_color[nt_guide_set] = 'black'
    
for gene, color in gene_to_color.items():
    colors[gl.gene_guides(gene)] = color
    
colors[rs.pooled_screen.ALL_NON_TARGETING] = 'black'
    
colors = colors.map(matplotlib.colors.to_hex)

In [7]:
for df in [edit_percentages, l2fcs]:
    df['color'] = colors

    df.drop(rs.pooled_screen.ALL_NON_TARGETING, inplace=True)

    df['gene'] = df.index.map(gl.guide_to_gene_with_non_targeting_guide_sets)

    df.rename(columns=lambda s: s.replace('_', ' '), level=0, inplace=True)
    df.rename(columns=cat_aliases, level=1, inplace=True)

In [8]:
condition_order = [
    'K562 PE2 rep1',
    'K562 PE2 rep2',
    'K562 PE3+50 rep1',
    'K562 PE3+50 rep2',
    'HeLa PE2 rep1',
    'HeLa PE2 rep2',
    'HeLa PE3+50 rep1',
    'HeLa PE3+50 rep2',
    'HeLa PE3-50 rep1',
    'HeLa PE3-50 rep2',
]

category_order = [cat_aliases.get(cat, cat) for cat in categories]
    
common_kwargs = dict(
    table_keys=['gene '],
    size=600,
    identical_bins=True,
    two_level_index=True,
    color_by='color ',
    hide_widgets=['annotation', 'color_by', 'grid_radio_buttons'],
    initial_alpha=0.7,
    menu_width=(200, 300),
    level_order={
        'Outcome category': category_order,
        'Screen condition': condition_order,
    },
    return_layout=True,
)
    
layout = hits.visualize.interactive.scatter(edit_percentages,
                                            initial_xy_names=(('K562 PE2 rep1', 'Intended edit'), ('K562 PE2 rep2', 'Intended edit')),
                                            data_bounds=(0, 100),
                                            grid='grid',
                                            **common_kwargs,
                                           )

description_data = {
    'title': 'Scatter plots of prime editing outcome frequencies',
    'details': 'Comparisons of the effects of CRISPRi sgRNAs on the frequencies of different prime editing outcomes in different screen conditions.',
}

templates.save_bokeh_html(layout, '../PE_scatter_frequencies.html', description_data=description_data, modal_data='scatter_tutorial.yaml')

In [9]:
layout = hits.visualize.interactive.scatter(l2fcs.replace([np.inf, -np.inf], np.nan),
                                            initial_xy_names=(('K562 PE2 rep1', 'Intended edit'), ('K562 PE3+50 rep1', 'Intended edit')),
                                            initial_data_lims=(-2.25, 3),
                                            data_bounds=(-5, 5),
                                            grid='diagonal+axes',
                                            **common_kwargs,
                                           )

description_data = {
    'title': 'Scatter plots of log₂ fold changes in prime editing outcome frequencies',
    'details': 'Comparisons of the effects of CRISPRi sgRNAs on log₂ fold changes in the frequencies of different prime editing outcomes in different screen conditions.',
}

templates.save_bokeh_html(layout, '../PE_scatter_l2fcs.html', description_data=description_data, modal_data='scatter_tutorial.yaml')