## Shallow mutagenesis of NS5 and effect on IFN signaling
Matt and Blake have identified 66 residues on NS5 that contact with its host cell target STAT2, a component of the JAK-STAT IFN signaling cascade. They want to integrate their data, which quantifies loss of IFN signaling per single aa mutant, with the DMS datasets generated by NS5 DMS analysis.  

In [1]:
# import necessary Python modules and packages
import glob
import os
import subprocess
import shutil

import Bio.SeqIO

import dms_tools2
from dms_tools2.ipython_utils import showPDF
from dms_tools2.plot import COLOR_BLIND_PALETTE_GRAY as CBPALETTE
import dms_tools2.prefs
import dms_tools2.utils
print(f"Using dms_tools2 {dms_tools2.__version__}")

from IPython.display import display, HTML

import pandas as pd

import altair as alt
from plotnine import *

import numpy as np

import dms_variants.plotnine_themes

Using dms_tools2 2.6.10


In [2]:
# specify out directory 
outdir = "./results/shallow_mut_scanning"
os.makedirs(outdir, exist_ok=True)

In [3]:
# specify datadir
datadir = "./data/"

#specify resultsdir
resultsdir = './results/'

Using the dms-view input file as a starting point, we have most of the values we hope to compare to IFN signaling, except for site entropy and site neffective. We can calculate those now:

In [4]:
# create a concatenated dataframe for all tiles of 'prefs_Huh-7.5.csv'
d = {}
tile_list = ['tile_1', 'tile_2', 'tile_3', 'tile_4', 
             'tile_5', 'tile_6', 'tile_7', 'tile_8']
for tile in tile_list:
    tilepath = os.path.join(resultsdir + tile + "/prefs_with_stop/prefs_Huh-7.5.csv")
    d[tile] = pd.read_csv(tilepath)
alltiles_prefs = pd.concat([d['tile_1'], d['tile_2'], d['tile_3'], d['tile_4'], 
                                d['tile_5'], d['tile_6'], d['tile_7'], d['tile_8']])

charlist = alltiles_prefs.columns[1:].tolist()

# now we can use this merged prefs file as input for dms_tools2.prefs.prefsEntropy
alltiles_prefs_Entropy = (alltiles_prefs
                  .melt(id_vars='site',
                        var_name='mutation',
                        value_name = 'prefs')
                  .merge(dms_tools2.prefs.prefsEntropy(alltiles_prefs, charlist)
                         [['site', 'entropy', 'neffective']],
                         on='site', validate='many_to_one')
                  .assign(mut_temp=lambda x: x['site'].astype(str) + x['mutation'])
                  [['mut_temp','prefs','entropy', 'neffective']]
                 )

In [5]:
# specify NS5 DMS input data
# for this we can start with the dms-view input file since this has much of the information we need for all tiles
NS5_DMS_allsites_file = os.path.join(resultsdir + 'all_tiles/alltiles_host_adaptation.csv')
NS5_DMS_allsites = pd.read_csv(NS5_DMS_allsites_file)

NS5_DMS_allsites = NS5_DMS_allsites.assign(mut_temp=lambda x: x['site'].astype(str) + x['mutant'])
NS5_DMS_allsites = (NS5_DMS_allsites
                    .merge(alltiles_prefs_Entropy, on = 'mut_temp')
                    [['site', 'wildtype', 'mutant', 'mutation', 'prefs','entropy','neffective','muteffect_C636',
                      'muteffect_Huh75', 'foldchange_C636', 'foldchange_Huh75',
                      'diffsel_Huh75_vs_C636', 'mutation_type']]
                   )

In [6]:
# specify and load IFN signaling data
IFN_signaling_file = os.path.join(datadir + "NS5_IFN_signaling.csv")
IFN_signaling = pd.read_csv(IFN_signaling_file)

# replace "stop" with "*"
IFN_signaling = (IFN_signaling
                 .replace(to_replace = "stop", value = "*")
                 .assign(mutation=lambda x: x['wt_aa'] + x['site'].astype(str) + x['mut_aa'])
                )

# add columns with mean and median IFN signaling 
IFN_signaling['mean_IFN'] = IFN_signaling.iloc[:, 4:9].mean(axis=1)
IFN_signaling['median_IFN'] = IFN_signaling.iloc[:, 4:9].median(axis=1)

# make a neat version of this dataframe without rep values
IFN_signaling_neat = IFN_signaling[['mutation', 'mean_IFN', 'median_IFN']]

  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,


In [7]:
# now merge all DMS data and IFN "shallow" mutational scan data
NS5_DMS_IFNshallow = IFN_signaling_neat.merge(NS5_DMS_allsites, on='mutation')
NS5_DMS_IFNshallow

Unnamed: 0,mutation,mean_IFN,median_IFN,site,wildtype,mutant,prefs,entropy,neffective,muteffect_C636,muteffect_Huh75,foldchange_C636,foldchange_Huh75,diffsel_Huh75_vs_C636,mutation_type
0,K45A,0.790447,0.798957,45,K,A,0.05568,2.885072,17.904862,-0.1734,-0.4975,0.8867,0.7083,-0.169906,all-others
1,K45C,0.796639,0.825626,45,K,C,0.04797,2.885072,17.904862,-1.0441,-0.7126,0.4849,0.6102,0.404604,all-others
2,K45D,0.914163,0.894440,45,K,D,0.03535,2.885072,17.904862,-0.9780,-1.1530,0.5077,0.4497,-0.070365,all-others
3,K45E,1.961723,1.972298,45,K,E,0.07439,2.885072,17.904862,-1.2889,-0.0796,0.4093,0.9463,0.467275,all-others
4,K45F,1.471748,1.243766,45,K,F,0.02444,2.885072,17.904862,-1.9252,-1.6855,0.2633,0.3109,0.084282,all-others
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163,H855T,0.905544,0.867245,855,H,T,0.00152,1.699918,5.473498,-5.8609,-5.8700,0.0172,0.0171,0.000371,all-others
164,H855V,0.744221,0.736862,855,H,V,0.00357,1.699918,5.473498,-4.6244,-4.6382,0.0405,0.0402,0.001657,all-others
165,H855W,0.788319,0.805128,855,H,W,0.21592,1.699918,5.473498,0.1639,1.2802,1.1203,2.4287,0.865526,all-others
166,H855Y,0.861251,0.921469,855,H,Y,0.18906,1.699918,5.473498,0.9540,1.0886,1.9373,2.1267,0.254659,all-others


Now we can produce Altair interactive visualization plots and color code based on site entropy (or site neffective -  I am showing both here but can only color code on one method per plot).

In [8]:
# select point nearest mouse
nearest = alt.selection(type='single', empty='none', nearest=True, on='mouseover')

# create the basic chart
basechart = (
 alt.Chart(NS5_DMS_IFNshallow
           .rename(columns={'muteffect_Huh75': 'muteffect',
                            'mean_IFN': 'mean IFN signaling',
                            'entropy': 'site entropy',
                            'neffective' : 'site neffective',
                            })
           .assign(dummy=0)
           )
 .add_selection(nearest)
 .encode(fill=alt.condition(nearest, alt.value('blue'), alt.value('lightblue')),
         opacity=alt.condition(nearest, alt.value(1), alt.value(0.8)),
         tooltip=['mutation', 'muteffect', 'site entropy', 'site neffective'],
         color='site entropy:Q'
         )
 .interactive()
 )

# side-by-side interactive plots to select mutations
mean_chart = (
 basechart.encode(x='muteffect:Q',
              y='mean IFN signaling:Q'
              )
      .mark_point()
      .properties(width=500,
                  height=500)
 |
 basechart.encode(x=alt.X('dummy:O', title=None),
              y='mean IFN signaling:Q',           
              )
      .properties(width=50,
                  height=500)
      .mark_tick()
 )

# save the interactive plot
plotfile = os.path.join(outdir, 'mean_IFN_signaling.html')
print(f"Saving interactive plot to {plotfile}")
mean_chart.save(plotfile)

# show the chart
mean_chart

Saving interactive plot to ./results/shallow_mut_scanning/mean_IFN_signaling.html


In [9]:
# select point nearest mouse
nearest = alt.selection(type='single', empty='none', nearest=True, on='mouseover')

# create the basic chart
basechart = (
 alt.Chart(NS5_DMS_IFNshallow
           .rename(columns={'muteffect_Huh75': 'muteffect',
                            'median_IFN': 'median IFN signaling',
                            'entropy': 'site entropy',
                            'neffective' : 'site neffective',
                            })
           .assign(dummy=0)
           )
 .add_selection(nearest)
 .encode(fill=alt.condition(nearest, alt.value('blue'), alt.value('lightblue')),
         opacity=alt.condition(nearest, alt.value(1), alt.value(0.8)),
         tooltip=['mutation', 'muteffect', 'site entropy', 'site neffective'],
         color='site entropy:Q'
         )
 .interactive()
 )

# side-by-side interactive plots to select mutations
median_chart = (
 basechart.encode(x='muteffect:Q',
              y='median IFN signaling:Q'
              )
      .mark_point()
      .properties(width=500,
                  height=500)
 |
 basechart.encode(x=alt.X('dummy:O', title=None),
              y='median IFN signaling:Q',           
              )
      .properties(width=50,
                  height=500)
      .mark_tick()
 )

# save the interactive plot
plotfile = os.path.join(outdir, 'median_IFN_signaling.html')
print(f"Saving interactive plot to {plotfile}")
median_chart.save(plotfile)

# show the chart
median_chart

Saving interactive plot to ./results/shallow_mut_scanning/median_IFN_signaling.html
