In [1]:
import pandas as pd
import numpy as np
import os
import json
import altair as alt

In [2]:
JSON_FILE = "../results/TP53/TP53_codons.fasta.FEL.json"
pvalueThreshold = 0.1

In [3]:
def getFELData(json_file):
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["content"]["0"]
#end method

def getFELHeaders(json_file):
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["headers"]
#end method

In [4]:
columns = getFELHeaders(JSON_FILE)
headers = [x[0] for x in columns]
headers

['alpha', 'beta', 'alpha=beta', 'LRT', 'p-value', 'Total branch length']

In [5]:
data = getFELData(JSON_FILE)

### Selected Sites

In [6]:
df = pd.DataFrame(getFELData(JSON_FILE), columns=headers, dtype = float)
df["omega"] = df["beta"] / df["alpha"]
df.index += 1
df["Site"] = df.index
df

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
1,0.000000,0.000000,0.000000,0.000000,1.000000,0.0,,1
2,0.000000,0.339422,0.249323,1.266684,0.260390,0.0,inf,2
3,0.000000,1.151249,12.645207,1.280825,0.257746,0.0,inf,3
4,0.701608,0.102552,0.177044,1.032707,0.309524,0.0,0.146167,4
5,1.452695,0.983727,1.103728,0.008873,0.924953,0.0,0.677174,5
...,...,...,...,...,...,...,...,...
1163,0.000000,0.000000,0.000000,0.000000,1.000000,0.0,,1163
1164,1.352941,2.750000,10000.000000,0.000303,0.986119,0.0,2.032609,1164
1165,0.706738,0.699515,0.713233,-0.000073,1.000000,0.0,0.989779,1165
1166,1.352941,2.750000,10000.000000,0.000458,0.982920,0.0,2.032609,1166


In [8]:
df_results = df[df["p-value"] <= pvalueThreshold]
df_results

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
10,206.249310,0.121320,50.369435,4.158660,4.142216e-02,0.0,0.000588,10
29,8.826517,0.187108,0.427561,7.696592,5.532520e-03,0.0,0.021198,29
59,9.313465,0.300997,0.545297,7.066748,7.852767e-03,0.0,0.032319,59
60,8.150542,0.253460,0.754393,9.577822,1.969419e-03,0.0,0.031097,60
66,14.228699,0.295181,0.653467,11.154736,8.381744e-04,0.0,0.020745,66
...,...,...,...,...,...,...,...,...
1156,2.224279,0.141684,0.299829,57.110090,4.118927e-14,0.0,0.063699,1156
1157,1.876779,0.343846,0.622276,46.167193,1.085798e-11,0.0,0.183210,1157
1158,1.332969,0.230111,0.419855,41.835945,9.926171e-11,0.0,0.172630,1158
1159,1.185283,0.086985,0.196890,44.216857,2.939393e-11,0.0,0.073388,1159


In [9]:
positive_sites = df_results[df_results["omega"] > 1.0]
positive_sites = positive_sites.reset_index()
positive_sites.index += 1
positive_sites.drop('index', axis=1, inplace=True)
positive_sites

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
1,0.340926,1.040929,0.715704,3.157398,0.075584,0.0,3.053245,237
2,0.0,1.328591,1.008843,2.92982,0.086957,0.0,inf,402
3,0.0,4.128121,2.812925,4.090232,0.043132,0.0,inf,495
4,0.0,2.950661,190.243593,3.102722,0.078161,0.0,inf,616
5,0.0,0.825339,0.532952,2.938756,0.086477,0.0,inf,628
6,0.0,2.016233,0.778453,5.255167,0.021882,0.0,inf,632
7,0.0,0.726619,0.47677,2.751053,0.09719,0.0,inf,642
8,0.0,0.769336,0.452595,3.094124,0.078575,0.0,inf,1151


In [10]:
negative_sites = df_results[df_results["omega"] < 1.0]
negative_sites = negative_sites.reset_index()
negative_sites.index += 1
negative_sites.drop('index', axis=1, inplace=True)
negative_sites

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
1,206.249310,0.121320,50.369435,4.158660,4.142216e-02,0.0,0.000588,10
2,8.826517,0.187108,0.427561,7.696592,5.532520e-03,0.0,0.021198,29
3,9.313465,0.300997,0.545297,7.066748,7.852767e-03,0.0,0.032319,59
4,8.150542,0.253460,0.754393,9.577822,1.969419e-03,0.0,0.031097,60
5,14.228699,0.295181,0.653467,11.154736,8.381744e-04,0.0,0.020745,66
...,...,...,...,...,...,...,...,...
407,2.224279,0.141684,0.299829,57.110090,4.118927e-14,0.0,0.063699,1156
408,1.876779,0.343846,0.622276,46.167193,1.085798e-11,0.0,0.183210,1157
409,1.332969,0.230111,0.419855,41.835945,9.926171e-11,0.0,0.172630,1158
410,1.185283,0.086985,0.196890,44.216857,2.939393e-11,0.0,0.073388,1159


In [54]:
#df = pd.DataFrame(getFELData(JSON_FILE), columns=headers, dtype = float)
#df.index += 1

# Save the DF here.
#OUTPUT = JSON_FILE.split("/")[-1].replace(".FEL.json", ".csv")
#print("# Saving:", OUTPUT)
#df.to_csv(OUTPUT)

#df["Site"] = df.index
#df["omega"] = df["beta"] / df["alpha"]
#df["Site"] = df.index
#df

In [11]:
source = df[df["omega"] < 10]
#source = df

line = alt.Chart(source).mark_line().encode(
    x='Site',
    y='omega', 
).properties(
    width=800,
    height=600)

line

In [16]:

source = df

line = alt.Chart(source).mark_line().encode(
    x='Site',
    y=alt.Y('omega',
        scale=alt.Scale(domain=(0, 10), clamp=True)),
    
    
).properties(
    width=800,
    height=600)

line

In [18]:
import numpy as np
df["log10(omega)"] = np.log10(df["omega"])

source = df

line = alt.Chart(source).mark_bar().encode(
    x='Site',
    y='log10(omega)',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True))
    
).properties(
    width=800,
    height=600)

line

In [19]:
import numpy as np
negative_sites["log10(omega)"] = np.log10(negative_sites["omega"])

source = negative_sites

line = alt.Chart(source).mark_bar().encode(
    x='Site',
    y='log10(omega)',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True))
    
).properties(
    width=800,
    height=600)

line

In [27]:
import numpy as np
source = negative_sites

line = alt.Chart(source).mark_point().encode(
    x='Site',
    y='omega',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True)),
    size=alt.Size('p-value', scale=alt.Scale(reverse=True))
    
).properties(
    width=800,
    height=600)

line

In [29]:
import numpy as np
source = negative_sites

line = alt.Chart(source).mark_circle().encode(
    x='Site',
    y='omega',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True)),
    size=alt.Size('p-value', scale=alt.Scale(reverse=True))
    
).properties(
    width=800,
    height=600)

line

In [34]:
import numpy as np
source = negative_sites

line = alt.Chart(source).mark_point().encode(
    x='Site',
    y='omega',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True)),
    size=alt.Size('p-value', scale=alt.Scale(reverse=True))
    
).properties(
    width=800,
    height=600)

line

## Figure legend.

In [36]:
## Summary

a = len(df["omega"])
b = len(negative_sites["omega"])

print("FEL analysis of your gene of interest found " + str(b) + " of " + str(a) + " sites to be statisically significant (p-value <= " + str(pvalueThreshold) + ") for pervasive negative/purifying selection" )


FEL analysis of your gene of interest found 411 of 1167 sites to be statisically significant (p-value <= 0.1) for pervasive negative/purifying selection
