In [77]:
#Adapted from https://nbviewer.jupyter.org/github/empet/Plotly-plots/blob/master/Chord-diagram.ipynb?flush_cache=true

In [78]:
import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go
import numpy as np
import pandas as pd
import itertools

In [79]:
df = pd.read_csv('../dataframes/h3n2_ha_12y_hi_egg.csv')

In [80]:
sites = ['186','194','138','156','203','219','225','246']
for site in sites:
    if site not in df.columns:
        sites.remove(site)

matrix = np.zeros((len(sites), len(sites)), dtype=int)
for site_a in sites:
    for site_b in sites:
        #Set diagonal to be number of viruses with ONLY that mutation
        if site_a == site_b:         
            other_sites = sites.copy()
            other_sites.remove(site_a)
            mask = df['mut'+str(site_a)]==True
            for other_site in other_sites:
                res = df['mut'+str(other_site)]==False
                mask &= res
            matrix_entry = len(df[mask])
        else:
            matrix_entry = len(df[(df['mut'+str(site_a)]==True)&(df['mut'+str(site_b)]==True)])
        matrix[sites.index(site_a)][sites.index(site_b)] = matrix_entry
print(matrix)

[[38  1 11 27  0 39 13 18]
 [ 1 86 14  0 22  0 17  0]
 [11 14  9  7  3  2 11  2]
 [27  0  7  6  0  9  6  2]
 [ 0 22  3  0  1  1  1  0]
 [39  0  2  9  1  2  6  0]
 [13 17 11  6  1  6  6  7]
 [18  0  2  2  0  0  7 12]]


In [81]:
labels = ['mut 186','mut 194','mut 138','mut 156','mut 203','mut 219','mut 225','mut246']
ideo_colors = ['rgba(26,152,80, 0.75)',
               'rgba(215,48,39, 0.75)',
             'rgba(244,109,67, 0.75)',
             'rgba(253,174,97, 0.75)',
             'rgba(254,224,139, 0.75)',
             'rgba(217,239,139, 0.75)',
             'rgba(166,217,106, 0.75)',
             'rgba(102,189,99, 0.75)']

In [82]:
def check_data(data_matrix):
    L, M=data_matrix.shape
    if L!=M:
        raise ValueError('Data array must have (n,n) shape')
    return L

L=check_data(matrix)

In [83]:
radii_sribb=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]# these value are set after a few trials 

In [84]:
def moduloAB(x, a, b): #maps a real number onto the unit circle identified with 
                       #the interval [a,b), b-a=2*PI
        if a>= b:
            raise ValueError('Incorrect interval ends')
        y = (x-a) % (b-a)
        return y+b if y < 0 else y+a

In [85]:
def test_2PI(x):
    return True
#      return 0 <= x < 2*pi

In [86]:
pi = np.pi

row_sum = [np.sum(matrix[k,:]) for k in range(L)]

#set the gap between two consecutive ideograms
gap = 2*pi*0.005
ideogram_length = 2*pi * np.asarray(row_sum) / sum(row_sum) - gap*np.ones(L)

In [87]:
def get_ideogram_ends(ideogram_len, gap):
    ideo_ends = []
    left = 0
    for k in range(len(ideogram_len)):
        right = left + ideogram_len[k]
        ideo_ends.append([left, right])   
        left = right + gap
    return ideo_ends 

In [88]:
ideo_ends = get_ideogram_ends(ideogram_length, gap)

In [89]:
def make_ideogram_arc(R, phi, a=100):
    # R is the circle radius
    # phi is the list of  angle coordinates of an arc ends
    # a is a parameter that controls the number of points to be evaluated on an arc
    if not test_2PI(phi[0]) or not test_2PI(phi[1]):
        phi = [moduloAB(t, 0, 2*pi) for t in phi]
    length = (phi[1]-phi[0]) % 2*pi 
    nr = 18 if length <= pi/4 else int(a*length/pi)

    if phi[0] < phi[1]:   
        theta = np.linspace(phi[0], phi[1], nr)
    else:
        phi = [moduloAB(t, -pi, pi) for t in phi]
        theta = np.linspace(phi[0], phi[1], nr)
    return R * np.exp(1j*theta)  

In [90]:
make_ideogram_arc(1.3, [11*pi/6, pi/17])

array([1.12583302-0.65j      , 1.13689513-0.6304518j ,
       1.14761784-0.61071539j, 1.15799797-0.59079667j,
       1.16803241-0.57070158j, 1.17771816-0.55043613j,
       1.18705234-0.53000636j, 1.19603216-0.50941837j,
       1.20465494-0.48867831j, 1.21291809-0.46779236j,
       1.22081917-0.44676677j, 1.22835581-0.42560781j,
       1.23552575-0.4043218j , 1.24232686-0.38291509j,
       1.24875711-0.36139407j, 1.25481458-0.33976517j,
       1.26049746-0.31803483j, 1.26580405-0.29620956j,
       1.27073277-0.27429586j, 1.27528215-0.25230028j,
       1.27945083-0.23022939j, 1.28323757-0.20808976j,
       1.28664123-0.18588801j, 1.2896608 -0.16363078j,
       1.29229537-0.14132469j, 1.29454417-0.11897642j,
       1.29640652-0.09659263j, 1.29788186-0.07418j   ,
       1.29896976-0.05174523j, 1.29966988-0.02929501j,
       1.29998203-0.00683605j, 1.2999061 +0.01562495j,
       1.29944212+0.03808129j, 1.29859022+0.06052626j,
       1.29735067+0.08295316j, 1.29572384+0.1053553j ,
       1.2

In [91]:
def map_data(data_matrix, row_value, ideogram_length):
    mapped = np.zeros(data_matrix.shape)
    for j  in range(L):
        mapped[:, j] = ideogram_length * data_matrix[:,j] / row_value
    return mapped  

In [92]:
mapped_data = map_data(matrix, row_sum, ideogram_length)
mapped_data

array([[0.39114483, 0.01029329, 0.11322614, 0.2779187 , 0.        ,
        0.40143812, 0.13381271, 0.18527913],
       [0.0102826 , 0.88430355, 0.14395639, 0.        , 0.22621719,
        0.        , 0.17480419, 0.        ],
       [0.10971978, 0.13964336, 0.08977073, 0.06982168, 0.02992358,
        0.01994905, 0.10971978, 0.01994905],
       [0.26880774, 0.        , 0.0696909 , 0.05973505, 0.        ,
        0.08960258, 0.05973505, 0.01991168],
       [0.        , 0.20647003, 0.028155  , 0.        , 0.009385  ,
        0.009385  , 0.009385  , 0.        ],
       [0.38900649, 0.        , 0.01994905, 0.08977073, 0.00997453,
        0.01994905, 0.05984715, 0.        ],
       [0.13049536, 0.17064778, 0.11041915, 0.06022863, 0.0100381 ,
        0.06022863, 0.06022863, 0.07026673],
       [0.17533362, 0.        , 0.01948151, 0.01948151, 0.        ,
        0.        , 0.0681853 , 0.11688908]])

In [93]:
idx_sort = np.argsort(mapped_data, axis=1)
idx_sort

array([[4, 1, 2, 6, 7, 3, 0, 5],
       [3, 5, 7, 0, 2, 6, 4, 1],
       [5, 7, 4, 3, 2, 0, 6, 1],
       [1, 4, 7, 3, 6, 2, 5, 0],
       [0, 3, 7, 4, 5, 6, 2, 1],
       [1, 7, 4, 2, 5, 6, 3, 0],
       [4, 3, 5, 6, 7, 2, 0, 1],
       [1, 4, 5, 2, 3, 6, 7, 0]])

In [94]:
def make_ribbon_ends(mapped_data, ideo_ends,  idx_sort):
    L = mapped_data.shape[0]
    ribbon_boundary = np.zeros((L,L+1))
    for k in range(L):
        start = ideo_ends[k][0]
        ribbon_boundary[k][0] = start
        for j in range(1,L+1):
            J = idx_sort[k][j-1]
            ribbon_boundary[k][j] = start + mapped_data[k][J]
            start = ribbon_boundary[k][j]
    return [[(ribbon_boundary[k][j], ribbon_boundary[k][j+1] ) for j in range(L)] for k in range(L)] 

In [95]:
ribbon_ends = make_ribbon_ends(mapped_data, ideo_ends,  idx_sort)
print ('ribbon ends starting from the ideogram[2]\n', ribbon_ends[2])

ribbon ends starting from the ideogram[2]
 [(3.0155086674925435, 3.0354577185131846), (3.0354577185131846, 3.0554067695338256), (3.0554067695338256, 3.0853303460647874), (3.0853303460647874, 3.1551520246370313), (3.1551520246370313, 3.2449227542299157), (3.2449227542299157, 3.3546425348434417), (3.3546425348434417, 3.4643623154569676), (3.4643623154569676, 3.604005672601455)]


In [96]:
def control_pts(angle, radius):
    #angle is a  3-list containing angular coordinates of the control points b0, b1, b2
    #radius is the distance from b1 to the  origin O(0,0) 

    if len(angle) != 3:
        raise InvalidInputError('angle must have len =3')
    b_cplx = np.array([np.exp(1j*angle[k]) for k in range(3)])
    b_cplx[1] = radius * b_cplx[1]
    return list(zip(b_cplx.real, b_cplx.imag))

In [97]:
def ctrl_rib_chords(l, r, radius):
    # this function returns a 2-list containing control poligons of the two quadratic Bezier
    #curves that are opposite sides in a ribbon
    #l (r) the list of angular variables of the ribbon arc ends defining 
    #the ribbon starting (ending) arc 
    # radius is a common parameter for both control polygons
    if len(l) != 2 or len(r) != 2:
        raise ValueError('the arc ends must be elements in a list of len 2')
    return [control_pts([l[j], (l[j]+r[j])/2, r[j]], radius) for j in range(2)]

In [98]:
#Define ribbon color
ribbon_color = [L * ['rgba(175,175,175,0.5)'] for k in range(L)]

In [99]:
#Change 186 and 194 ribbon colors
for k_186 in range(len(ribbon_color[0])):
    ribbon_color[0][k_186] = ideo_colors[0]
for k_194 in range(len(ribbon_color[1])):
    ribbon_color[1][k_194] = ideo_colors[1]


In [100]:
def make_q_bezier(b):# defines the Plotly SVG path for a quadratic Bezier curve defined by the 
                     #list of its control points
    if len(b) != 3:
        raise valueError('control poligon must have 3 points')
    A, B, C = b    
    return f'M {A[0]}, {A[1]} Q {B[0]}, {B[1]} {C[0]}, {C[1]}'

In [101]:
b=[(1,4), (-0.5, 2.35), (3.745, 1.47)]
make_q_bezier(b)

'M 1, 4 Q -0.5, 2.35 3.745, 1.47'

In [102]:
def make_ribbon_arc(theta0, theta1):

    if test_2PI(theta0) and test_2PI(theta1):
        if theta0 < theta1:
            theta0 = moduloAB(theta0, -pi, pi)
            theta1 = moduloAB(theta1, -pi, pi)
            if theta0  *theta1 > 0:
                raise ValueError('incorrect angle coordinates for ribbon')
    
        nr = int(40 * (theta0 - theta1) / pi)
        if nr <= 2: nr = 3
        theta = np.linspace(theta0, theta1, nr)
        pts=np.exp(1j*theta)# points in polar complex form, on the given arc
    
        string_arc = ''
        for k in range(len(theta)):
            string_arc += f'L {pts.real[k]}, {pts.imag[k]} '
        return   string_arc 
    else:
        raise ValueError('the angle coordinates for an arc side of a ribbon must be in [0, 2*pi]')

In [103]:
make_ribbon_arc(np.pi/3, np.pi/6)

'L 0.5000000000000001, 0.8660254037844386 L 0.5877852522924732, 0.8090169943749473 L 0.6691306063588583, 0.7431448254773941 L 0.7431448254773942, 0.6691306063588581 L 0.8090169943749475, 0.5877852522924731 L 0.8660254037844387, 0.49999999999999994 '

In [104]:
def make_layout(title, plot_size):

    return dict(title=title,
                xaxis=dict(visible=False),
                yaxis=dict(visible=False),
                showlegend=False,
                width=plot_size,
                height=plot_size,
                margin=dict(t=25, b=25, l=25, r=25),
                hovermode=False,
                 )  

In [105]:
def make_ideo_shape(path, line_color, fill_color):
    #line_color is the color of the shape boundary
    #fill_color is the color assigned to an ideogram
    
    return  dict(line=dict(color=line_color, 
                           width=0.45),
                 path=path,
                 layer='below',
                 type='path',
                 fillcolor=fill_color)   

In [106]:
def make_ribbon(l, r, line_color, fill_color, radius=0.2):
    #l=[l[0], l[1]], r=[r[0], r[1]]  represent the opposite arcs in the ribbon 
    #line_color is the color of the shape boundary
    #fill_color is the fill color for the ribbon shape
    
    poligon = ctrl_rib_chords(l,r, radius)
    b, c = poligon  
           
    return  dict(line=dict(color=line_color, 
                             width=0.5),
                 path=make_q_bezier(b) + make_ribbon_arc(r[0], r[1])+
                         make_q_bezier(c[::-1]) + make_ribbon_arc(l[1], l[0]),
                 type='path',
                 layer='below',
                 fillcolor = fill_color,    
        )

def make_self_rel(l, line_color, fill_color, radius):
    #radius is the radius of Bezier control point b_1
    
    b = control_pts([l[0], (l[0]+l[1])/2, l[1]], radius) 
    
    return  dict(line = dict(color=line_color, 
                             width=0.5),
                 path =  make_q_bezier(b)+make_ribbon_arc(l[1], l[0]),
                 type = 'path',
                 layer = 'below',
                 fillcolor = fill_color   
                )

In [107]:
def invPerm(perm):
    # function that returns the inverse of a permutation, perm
    inv = [0] * len(perm)
    for i, s in enumerate(perm):
        inv[s] = i
    return inv

In [108]:
layout=make_layout('Pairwise epistasis between HA1 residues commonly mutated during egg-passaging', 800)   

In [109]:
ribbon_info=[]
shapes=[]
annotations = []
for k in range(L):
    
    sigma = idx_sort[k]
    sigma_inv = invPerm(sigma)
    for j in range(k, L):
        if matrix[k][j] == 0 and matrix[j][k]==0: continue
        eta = idx_sort[j]
        eta_inv = invPerm(eta)
        l = ribbon_ends[k][sigma_inv[j]]  
        
        if j == k:
            shapes.append(make_self_rel(l, 'rgba(175,175,175,0.5)',
                                        'rgba(175,175,175,0.5)', radius=radii_sribb[k])) 
#             shapes.append(make_self_rel(l, ideo_colors[k] ,
#                         ideo_colors[k], radius=radii_sribb[k])) 

            z = 0.9*np.exp(1j*(l[0]+l[1])/2)
            

            ribbon_info.append(go.Scatter(x=[z.real],
                                       y=[z.imag],
                                       mode='markers',
                                       marker=dict(size=0.5, color=ideo_colors[k])
                                       )
                              )
        else:
            r = ribbon_ends[j][eta_inv[k]]
            zi = 0.9 * np.exp(1j*(l[0]+l[1])/2)
            zf = 0.9 * np.exp(1j*(r[0]+r[1])/2)
            #texti and textf are the strings that will be displayed when hovering the mouse 
            #over the two ribbon ends
            texti = f'{labels[k]}'
            textf = f'{labels[j]}'
            
            ribbon_info.append(go.Scatter(x=[zi.real],
                                          y=[zi.imag],
                                          mode='markers',
                                          marker=dict(size=0.5, color=ribbon_color[k][j]),
                                          text=texti
                                       )
                              ),
            ribbon_info.append(go.Scatter(x=[zf.real],
                                          y=[zf.imag],
                                          mode='markers',
                                          marker=dict(size=0.5, color=ribbon_color[k][j]),
                                          text=textf
                                       )
                              )
            r = (r[1], r[0]) # IMPORTANT!!!  Reverse these arc ends because otherwise you get
                          # a twisted ribbon
            #append the ribbon shape
            shapes.append(make_ribbon(l, r, ribbon_color[k][j] , ribbon_color[k][j]))
           
                                    

In [110]:
ideograms = []
for k in range(len(ideo_ends)):
    z =  make_ideogram_arc(1.1, ideo_ends[k])
    zi = make_ideogram_arc(1.0, ideo_ends[k])
    m = len(z)
    n = len(zi)
    ideograms.append(go.Scatter(x=z.real,
                                y=z.imag,
                                mode='lines',
                                line=dict(color=ideo_colors[k], shape='spline', width=0.25)
#                                 text=f'{labels[k]} <br>{int(row_sum[k])} viruses'
                             )
                     )
    
       
    path = 'M '
    for s in range(m):
        path += f'{z.real[s]}, {z.imag[s]} L '
        
    Zi = np.array(zi.tolist()[::-1]) 

    for s in range(m):
        path += f'{Zi.real[s]}, {Zi.imag[s]} L '
    path += f'{z.real[0]} ,{z.imag[0]}' 
   
    shapes.append(make_ideo_shape(path,ideo_colors[k] , ideo_colors[k]))
    
    z_text =  make_ideogram_arc(1.2, ideo_ends[k])
    Z_text = np.array(z_text.tolist()[::-1]) 
    annotations.append(dict(x=Z_text.real[int(len(Z_text)/2)], 
                            y=Z_text.imag[int(len(Z_text)/2)], 
                            showarrow=False,
                            align='left',
                            valign='bottom',
                            text=f'{labels[k]}'))


In [111]:
data = ideograms + ribbon_info
layout['shapes'] = shapes
layout['annotations'] = annotations
fig = go.Figure(data=data, layout=layout)
from plotly.offline import download_plotlyjs, init_notebook_mode,  iplot, plot
init_notebook_mode(connected=True)
iplot(fig)