In [21]:
import pandas as pd
import numpy as np
import json
from Bio import SeqIO
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [20]:
#Import organized Auspice data
df = pd.read_csv('../dataframes/auspice_df.csv', index_col=0)
egg_df = pd.read_csv('../dataframes/egg_df.csv', index_col=0)

In [22]:
#Define positions
positions= [194,186,160,225,219,156,203,138]

In [25]:
egg_df.groupby(['160','mut160'])['circulating160'].value_counts()

160  mut160  circulating160
A    1       T                   2
I    1       T                   3
K    0       K                 181
     1       T                  46
R    1       T                   1
T    0       T                  12
Name: circulating160, dtype: int64

In [30]:
print(len(egg_df[egg_df['160']!='K']))
print(len(egg_df[(egg_df['160']=='K')&(egg_df['mut160']==1)]))
egg_df.groupby(['160','mut160'])['circulating160'].value_counts()

18
46


160  mut160  circulating160
A    1       T                   2
I    1       T                   3
K    0       K                 181
     1       T                  46
R    1       T                   1
T    0       T                  12
Name: circulating160, dtype: int64

In [32]:
egg_df[(egg_df['160']=='K')|(egg_df['160']=='T')].groupby(['160','mut160','mut194','mut186','mut156', 'mut219'])['strain'].agg('count')

160  mut160  mut194  mut186  mut156  mut219
K    0       0       0       0       0         59
                             1       0          6
                                     1          1
                     1       0       0         28
                                     1         22
                             1       0         12
                                     1          4
             1       0       0       0         48
                     1       0       0          1
     1       0       1       0       0          3
                                     1          2
                             1       0          1
             1       0       0       0         39
                                     1          1
T    0       0       0       0       0          7
                             1       1          1
                     1       0       0          1
             1       0       0       0          3
Name: strain, dtype: int64

In [48]:
#Sankay
from ipysankeywidget import SankeyWidget
from ipywidgets import Layout


cmap = {'t0':'#ffcccc', 't1': '#ff9999', 't2': '#ff6666', 't3': '#ff0000', 
        'k0': '#ccccff', 'k1': '#9999ff', 'k2': '#6666ff', 'k3': '#0000ff'}

layout = Layout(width="1000", height="600")
links = [
    #160T
    {'source': '160T passaged\n in eggs', 'target': '160T', 'value': 12, 'color': cmap['t0']},
    {'source': '160T', 'target': 'T-194L', 'value': 9, 'color': cmap['t0']},
    {'source': '160T', 'target': 'T-194mutation', 'value': 3,  'color': cmap['t1']},
    {'source': 'T-194L', 'target': 'T-L-186S', 'value': 8,  'color': cmap['t0']},
    {'source': 'T-194L', 'target': 'T-L-186mutation', 'value': 1,  'color': cmap['t1']},
    {'source': 'T-194mutation', 'target': 'T-X-186S', 'value': 3,  'color': cmap['t1']},
    {'source': 'T-X-186S', 'target': 'T-X-S-156K', 'value': 3,  'color': cmap['t1']},
    {'source': 'T-X-S-156K', 'target': 'T-X-S-K-219S', 'value': 3,  'color': cmap['t1']},
    {'source': 'T-L-186S', 'target': 'T-L-S-156K', 'value': 7,  'color': cmap['t0']},
    {'source': 'T-L-186S', 'target': 'T-L-S-156mutation', 'value': 1,  'color': cmap['t1']},
    {'source': 'T-L-S-156K', 'target': 'T-L-S-K-219S', 'value': 7,  'color': cmap['t0']},
    {'source': 'T-L-S-156mutation', 'target': 'T-L-S-X-219mutation', 'value': 1,  'color': cmap['t2']},
    {'source': 'T-L-186mutation', 'target': 'T-L-X-156K', 'value': 1,  'color': cmap['t1']},
    {'source': 'T-L-X-156K', 'target': 'T-L-X-K-219S', 'value': 1,  'color': cmap['t1']},
    
    #T160X
    
    
    #T160K
    {'source': '160T passaged\n in eggs', 'target': 'T160K', 'value': 46, 'color': cmap['t1']},
    {'source': 'T160K', 'target': 'X-194L', 'value': 6, 'color': cmap['t1']},
    {'source': 'T160K', 'target': 'X-194mutation', 'value': 40, 'color': cmap['t2']},
    {'source': 'X-194L', 'target': 'X-L-186mutation', 'value': 6, 'color': cmap['t2']},
    {'source': 'X-194mutation', 'target': 'X-X-186S', 'value': 40, 'color': cmap['t2']},
    {'source': 'X-L-186mutation', 'target': 'X-L-X-156K', 'value': 5, 'color': cmap['t2']},
    {'source': 'X-L-186mutation', 'target': 'X-L-X-156mutation', 'value': 1, 'color': cmap['t3']},
    {'source': 'X-X-186S', 'target': 'X-X-S-156K', 'value': 40, 'color': cmap['t2']},
    {'source': 'X-L-X-156K', 'target': 'X-L-X-K-219S', 'value': 3, 'color': cmap['t2']},
    {'source': 'X-L-X-156K', 'target': 'X-L-X-K-219mutation', 'value': 2, 'color': cmap['t3']},
    {'source': 'X-L-X-156mutation', 'target': 'X-L-X-X-219S', 'value': 1, 'color': cmap['t3']},
    {'source': 'X-X-S-156K', 'target': 'X-X-S-K-219S', 'value': 39, 'color': cmap['t2']},
    {'source': 'X-X-S-156K', 'target': 'X-X-S-K-219mutation', 'value': 1, 'color': cmap['t3']},
    
    #160K
    {'source': '160K passaged\n in eggs', 'target': '160K', 'value': 181, 'color': cmap['k0']},
    {'source': '160K', 'target': 'K-194L', 'value': 132, 'color': cmap['k0']},
    {'source': '160K', 'target': 'K-194mutation', 'value': 49, 'color': cmap['k1']},
    {'source': 'K-194L', 'target': 'K-L-186S', 'value': 66, 'color': cmap['k0']},
    {'source': 'K-194L', 'target': 'K-L-186mutation', 'value': 66, 'color': cmap['k1']},
    {'source': 'K-194mutation', 'target': 'K-X-186S', 'value': 48, 'color': cmap['k1']},
    {'source': 'K-194mutation', 'target': 'K-X-186mutation', 'value': 1, 'color': cmap['k2']},
    {'source': 'K-L-186S', 'target': 'K-L-S-156K', 'value': 59, 'color': cmap['k0']},
    {'source': 'K-L-186S', 'target': 'K-L-S-156mutation', 'value': 7, 'color': cmap['k1']},
    {'source': 'K-L-186mutation', 'target': 'K-L-X-156K', 'value': 50, 'color': cmap['k1']},
    {'source': 'K-L-186mutation', 'target': 'K-L-X-156mutation', 'value': 16, 'color': cmap['k2']},
    {'source': 'K-X-186S', 'target': 'K-X-S-156K', 'value': 48, 'color': cmap['k1']},
    {'source': 'K-X-186mutation', 'target': 'K-X-X-156K', 'value': 1, 'color': cmap['k2']},
    {'source': 'K-L-S-156K', 'target': 'K-L-S-K-219S', 'value': 59, 'color': cmap['k0']},
    {'source': 'K-L-S-156mutation', 'target': 'K-L-S-X-219S', 'value': 6, 'color': cmap['k1']},
    {'source': 'K-L-S-156mutation', 'target': 'K-L-S-X-219mutation', 'value': 1, 'color': cmap['k2']},
    {'source': 'K-L-X-156K', 'target': 'K-L-X-K-219S', 'value': 28, 'color': cmap['k1']},
    {'source': 'K-L-X-156K', 'target': 'K-L-X-K-219mutation', 'value': 22, 'color': cmap['k2']},
    {'source': 'K-L-X-156mutation', 'target': 'K-L-X-X-219S', 'value': 12, 'color': cmap['k2']},
    {'source': 'K-L-X-156mutation', 'target': 'K-L-X-X-219mutation', 'value': 4, 'color': cmap['k3']},
    {'source': 'K-X-S-156K', 'target': 'K-X-S-K-219S', 'value': 48, 'color': cmap['k1']},
    {'source': 'K-X-X-156K', 'target': 'K-X-X-K-219S', 'value': 1, 'color': cmap['k2']},
    
]

nodes = [
    {'id': '160K', 'title': ' '},
    {'id': '160T', 'title': ' '},
    {'id': 'T160K', 'title': 'T160K', 'style': 'process'},
    
    {'id': 'K-194L', 'title': ' '},
    {'id': 'T-194L', 'title': ' '},
    {'id': 'X-194L', 'title': ' '},
    {'id': 'K-194mutation', 'title': '*194 mut*', 'style': 'process' },
    {'id': 'T-194mutation', 'title': '*194 mut*', 'style': 'process' },
    {'id': 'X-194mutation', 'title': '*194 mut*', 'style': 'process' },
    
    {'id': 'T-L-186S', 'title': ' '},
    {'id': 'T-L-186mutation', 'title': '*186 mut*', 'style': 'process' },
    {'id': 'T-X-186S', 'title': ' '},
    {'id': 'X-L-186mutation', 'title': '*186 mut*', 'style': 'process' },
    {'id': 'X-X-186S', 'title': ' '},
    {'id': 'K-L-186S', 'title': ' '},
    {'id': 'K-L-186mutation', 'title': '*186 mut*', 'style': 'process' },
    {'id': 'K-X-186S', 'title': ' '},
    {'id': 'K-X-186mutation', 'title': '*186 mut*', 'style': 'process' },
    
    {'id': 'T-X-S-156K', 'title': ' '},
    {'id': 'T-L-S-156K', 'title': ' '},
    {'id': 'T-L-S-156mutation', 'title': '*156 mut*', 'style': 'process' },
    {'id': 'T-L-X-156K', 'title': ' '},
    {'id': 'X-L-X-156K', 'title': ' '},
    {'id': 'X-L-X-156mutation', 'title': '*156 mut*', 'style': 'process' },
    {'id': 'X-X-S-156K', 'title': ' '},
    {'id': 'K-L-S-156K', 'title': ' '},
    {'id': 'K-L-S-156mutation', 'title': '*156 mut*', 'style': 'process' },
    {'id': 'K-L-X-156K', 'title': ' '},
    {'id': 'K-L-X-156mutation', 'title': '*156 mut*', 'style': 'process' },
    {'id': 'K-X-S-156K', 'title': ' '},
    {'id': 'K-X-X-156K', 'title': ' '},
    
    {'id': 'T-L-S-K-219S', 'title': ' '},
    {'id': 'T-L-X-K-219S', 'title': ' '},
    {'id': 'T-X-S-K-219S', 'title': ' '},
    {'id': 'X-L-X-K-219S', 'title': ' '},
    {'id': 'T-L-S-X-219mutation', 'title': '*219 mut*' , 'style': 'process' },
    {'id': 'X-L-X-K-219mutation', 'title': '*219 mut*' , 'style': 'process' },
    {'id': 'X-L-X-X-219S', 'title': ' '},
    {'id': 'X-X-S-K-219S', 'title': ' '},
    {'id': 'X-X-S-K-219mutation', 'title': '*219 mut*' , 'style': 'process' },
    {'id': 'K-L-S-K-219S', 'title': ' '},
    {'id': 'K-L-S-X-219S', 'title': ' '},
    {'id': 'K-L-S-X-219mutation', 'title': '*219 mut*', 'style': 'process'},
    {'id': 'K-L-X-K-219S', 'title': ' '},
    {'id': 'K-L-X-K-219mutation', 'title': '*219 mut*', 'style': 'process'},
    {'id': 'K-L-X-X-219S', 'title': ' '},
    {'id': 'K-L-X-X-219mutation', 'title': '*219 mut*', 'style': 'process'},
    {'id': 'K-X-S-K-219S', 'title': ' '},
    {'id': 'K-X-X-K-219S', 'title': ' '},
]

groups = [{'id': 'unpassaged genotype', 'title': 'unpassaged genotype', 'nodes': ['160T passaged\n in eggs', '160K passaged\n in eggs']},
    {'id': '160', 'title': '160', 'nodes': ['160T', '160K','T160K']},
    {'id': '194', 'title': '194', 'nodes': ['K-194L', 'T-194L', 'X-194L', 'K-194mutation', 'T-194mutation', 'X-194mutation']},
    {'id': '186', 'title': '186', 'nodes': ['T-L-186S', 'T-L-186mutation', 'T-X-186S', 'X-L-186mutation', 'X-X-186S', 'K-L-186S', 'K-L-186mutation', 'K-X-186S', 'K-X-186mutation']},
    {'id': '156', 'title': '156', 'nodes': ['T-X-S-156K', 'T-L-S-156K','T-L-S-156mutation', 'T-L-X-156K', 'X-L-X-156K', 'X-L-X-156mutation', 'X-X-S-156K', 'K-L-S-156K', 'K-L-S-156mutation', 'K-L-X-156K', 'K-L-X-156mutation', 'K-X-S-156K', 'K-X-X-156K']},
    {'id': '219', 'title': '219', 'nodes': ['X-X-S-K-219mutation','T-L-X-K-219mutation','T-L-S-K-219S','T-L-X-K-219S','T-X-S-K-219S','X-L-X-K-219S','X-L-X-K-219mutation','X-L-X-X-219S','X-X-S-K-219S','K-L-S-K-219S','K-L-S-X-219S','K-L-S-X-219mutation','K-L-X-K-219S','K-L-X-K-219mutation','K-L-X-X-219S','K-L-X-X-219mutation','K-X-S-K-219S','K-X-X-K-219S']},
]


sankey= SankeyWidget(links=links, nodes=nodes, groups=groups, layout=layout, margins=dict(top=10, bottom=0, left=100, right=70))
sankey

A Jupyter Widget