# Compare pairs of lineages w.r.t. mutational profiles and determinants of transmissibility

In [1]:
import pickle
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import torch
from pyrocov import pangolin
import pandas as pd

matplotlib.rcParams["figure.dpi"] = 200
matplotlib.rcParams["axes.edgecolor"] = "gray"
matplotlib.rcParams["savefig.bbox"] = "tight"
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = ['Arial', 'Avenir', 'DejaVu Sans']

In [2]:
dataset = torch.load("results/mutrans.data.single.None.pt", map_location="cpu")
print(dataset.keys())
locals().update(dataset)

dict_keys(['location_id', 'mutations', 'weekly_clades', 'features', 'lineage_id', 'lineage_id_inv', 'time_shift'])


In [3]:
features = dataset['features']
coefficients = pd.read_csv("paper/mutations.tsv", sep="\t", index_col=1)
coefficients = coefficients.loc[dataset['mutations']].copy()
feature_names = coefficients.index.values.tolist()

lineage_id = {name: i for i, name in enumerate(lineage_id_inv)}
lineage_id_inv = dataset['lineage_id_inv']

deltaR = coefficients['Δ log R'].values
zscore = coefficients['mean/stddev'].values

In [4]:
##########################################
### select pair of lineages to compare ###
##########################################
A, B = 'B.1.617.2', 'B.1'
#A, B = 'B.1.1.7', 'B.1.1'
#A, B = 'B.1.427', 'B.1'
#A, B = 'B.1.351', 'B.1'
#A, B = 'P.1', 'B.1.1'
#A, B = 'AY.2', 'B.1.617.2'

A_id, B_id = lineage_id[A], lineage_id[B]
A_feat, B_feat = features[A_id].numpy(), features[B_id].numpy()

delta_cov = A_feat - B_feat
delta_cov_abs = np.fabs(A_feat - B_feat)

In [5]:
# look at top 100 mutations w.r.t. effect size
deltaR_cutoff = np.fabs(deltaR)[np.argsort(np.fabs(deltaR))[-100]]
print("deltaR_cutoff", deltaR_cutoff)

selected_features = []

for i, name in enumerate(feature_names):
    if len(name) <= 6:
        name = name + "  "
    dR = deltaR[i]
    dC = delta_cov[i]
    z = zscore[i]
    if dR > deltaR_cutoff and np.fabs(dC) > 0.5:
        selected_features.append(name)
        print("{} \t deltaR: {:.3f}  zscore: {:.2f} \t\t delta_feature: {:.2f}".format(name, dR, z, dC))

print("\n", A, "over" ,B)
for s in selected_features:
    print(s + ", ", end='')

deltaR_cutoff 0.042476400000000004
ORF1a:P2287S 	 deltaR: 0.044  zscore: 253.71 		 delta_feature: 0.80
ORF1a:T3255I 	 deltaR: 0.045  zscore: 240.18 		 delta_feature: 0.80
S:L452R 	 deltaR: 0.048  zscore: 244.88 		 delta_feature: 0.98
S:P681R 	 deltaR: 0.051  zscore: 300.61 		 delta_feature: 0.97

 B.1.617.2 over B.1
ORF1a:P2287S, ORF1a:T3255I, S:L452R, S:P681R, 

In [6]:
selected_features = []

# look at large z-score mutations (i.e. increase growth rate)
for i, name in enumerate(feature_names):
    if len(name) <= 6:
        name = name + "  "
    dR = deltaR[i]
    dC = delta_cov[i]
    z = zscore[i]
    if z > 200.0 and np.fabs(dC) > 0.5:
        selected_features.append(name)
        print("{} \t deltaR: {:.3f}  zscore: {:.2f} \t\t delta_feature: {:.2f}".format(name, dR, z, dC))

print("\n", A, "over" ,B)
for s in selected_features:
    print(s + ", ", end='')

M:I82T   	 deltaR: 0.038  zscore: 230.30 		 delta_feature: 1.00
N:D63G   	 deltaR: 0.028  zscore: 201.31 		 delta_feature: 0.99
ORF1a:P2287S 	 deltaR: 0.044  zscore: 253.71 		 delta_feature: 0.80
ORF1a:T3255I 	 deltaR: 0.045  zscore: 240.18 		 delta_feature: 0.80
ORF1b:G662S 	 deltaR: 0.027  zscore: 228.74 		 delta_feature: 0.98
ORF1b:P1000L 	 deltaR: 0.035  zscore: 211.10 		 delta_feature: 0.98
S:D950N 	 deltaR: 0.036  zscore: 231.28 		 delta_feature: 0.98
S:E156- 	 deltaR: 0.028  zscore: 203.85 		 delta_feature: 0.95
S:L452R 	 deltaR: 0.048  zscore: 244.88 		 delta_feature: 0.98
S:P681R 	 deltaR: 0.051  zscore: 300.61 		 delta_feature: 0.97

 B.1.617.2 over B.1
M:I82T  , N:D63G  , ORF1a:P2287S, ORF1a:T3255I, ORF1b:G662S, ORF1b:P1000L, S:D950N, S:E156-, S:L452R, S:P681R, 

In [7]:
def findfeat(s):
    for i, n in enumerate(feature_names):
        if n==s:
            return i
    return -1

In [8]:
print(features[lineage_id['B.1']].numpy()[findfeat('S:H69-')])
print(features[lineage_id['B.1.1.7']].numpy()[findfeat('S:H69-')])

0.0054945056
0.96954316


In [9]:
coefficients[coefficients.index == 'S:H69-'][['Δ log R', 'mean/stddev']].values

array([[1.73514e-02, 2.10334e+02]])