In [22]:
import pandas as pd
import numpy as np
import os
import json
import altair as alt

In [10]:
JSON_FILE = "../results/REV3L/REV3L_codons.fasta.FEL.json"
pvalueThreshold = 0.1

In [17]:
def getFELData(json_file):
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["content"]["0"]
#end method

def getFELHeaders(json_file):
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["headers"]
#end method

In [19]:
columns = getFELHeaders(JSON_FILE)
headers = [x[0] for x in columns]
headers

['alpha', 'beta', 'alpha=beta', 'LRT', 'p-value', 'Total branch length']

In [20]:
data = getFELData(JSON_FILE)

### Selected Sites

In [43]:
df = pd.DataFrame(getFELData(JSON_FILE), columns=headers, dtype = float)
df["omega"] = df["beta"] / df["alpha"]
df.index += 1
df["Site"] = df.index
df

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
1,0.000000,0.000000,0.000000,0.000000,1.000000e+00,0.0,,1
2,1.473573,101.503563,832.416667,0.007064,9.330181e-01,0.0,68.882637,2
3,0.000000,0.000000,0.000000,0.000000,1.000000e+00,0.0,,3
4,0.000000,0.000000,0.000000,0.000000,1.000000e+00,0.0,,4
5,0.000000,0.000000,0.000000,0.000000,1.000000e+00,0.0,,5
...,...,...,...,...,...,...,...,...
4730,0.900913,0.000000,0.346987,42.773194,6.146872e-11,0.0,0.000000,4730
4731,0.725057,0.021799,0.304933,32.282659,1.332976e-08,0.0,0.030065,4731
4732,0.409262,0.062274,0.134502,8.812250,2.992149e-03,0.0,0.152161,4732
4733,0.203270,0.099954,0.128899,1.056338,3.040519e-01,0.0,0.491733,4733


In [46]:
df_results = df[df["p-value"] < pvalueThreshold]
df_results

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
8,937.666667,0.000000,7.926809,5.143955,2.332745e-02,0.0,0.000000,8
10,10000.000000,0.079556,0.146784,36.865341,1.265761e-09,0.0,0.000008,10
11,2.775507,0.169674,0.440532,39.189257,3.846509e-10,0.0,0.061133,11
12,1.812152,0.238034,0.637233,27.837767,1.319256e-07,0.0,0.131354,12
16,0.412801,0.144242,0.239245,4.657475,3.091862e-02,0.0,0.349422,16
...,...,...,...,...,...,...,...,...
4729,1.171797,0.040026,0.280195,35.099468,3.132864e-09,0.0,0.034157,4729
4730,0.900913,0.000000,0.346987,42.773194,6.146872e-11,0.0,0.000000,4730
4731,0.725057,0.021799,0.304933,32.282659,1.332976e-08,0.0,0.030065,4731
4732,0.409262,0.062274,0.134502,8.812250,2.992149e-03,0.0,0.152161,4732


In [53]:
positive_sites = df_results[df_results["omega"] > 1.0]
positive_sites = positive_sites.reset_index()
positive_sites.index += 1
positive_sites.drop('index', axis=1, inplace=True)
positive_sites

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
1,0.0,1.245272,0.693513,3.03173,0.081651,0.0,inf,1234
2,0.0,1.613266,1.335891,4.967953,0.025821,0.0,inf,1357
3,0.271009,1.925659,1.288612,4.609345,0.031798,0.0,7.105511,1379
4,0.0,0.540583,0.391684,2.72668,0.098684,0.0,inf,3045
5,0.741795,3.159465,2.630625,4.134669,0.042013,0.0,4.259216,3050
6,0.25627,0.995796,0.820092,4.188847,0.040691,0.0,3.885734,3074
7,0.770415,1.544074,1.330016,3.428697,0.064073,0.0,2.00421,3145
8,0.580457,1.346705,1.098046,5.329301,0.02097,0.0,2.320075,3149
9,0.280461,1.065424,0.784501,4.425021,0.035416,0.0,3.798831,3164
10,0.0,1.03752,0.763511,3.013697,0.082564,0.0,inf,3194


In [57]:
negative_sites = df_results[df_results["omega"] < 1.0]
negative_sites = negative_sites.reset_index()
negative_sites.index += 1
negative_sites.drop('index', axis=1, inplace=True)
negative_sites

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
1,937.666667,0.000000,7.926809,5.143955,2.332745e-02,0.0,0.000000,8
2,10000.000000,0.079556,0.146784,36.865341,1.265761e-09,0.0,0.000008,10
3,2.775507,0.169674,0.440532,39.189257,3.846509e-10,0.0,0.061133,11
4,1.812152,0.238034,0.637233,27.837767,1.319256e-07,0.0,0.131354,12
5,0.412801,0.144242,0.239245,4.657475,3.091862e-02,0.0,0.349422,16
...,...,...,...,...,...,...,...,...
2650,1.171797,0.040026,0.280195,35.099468,3.132864e-09,0.0,0.034157,4729
2651,0.900913,0.000000,0.346987,42.773194,6.146872e-11,0.0,0.000000,4730
2652,0.725057,0.021799,0.304933,32.282659,1.332976e-08,0.0,0.030065,4731
2653,0.409262,0.062274,0.134502,8.812250,2.992149e-03,0.0,0.152161,4732


In [54]:
#df = pd.DataFrame(getFELData(JSON_FILE), columns=headers, dtype = float)
#df.index += 1

# Save the DF here.
#OUTPUT = JSON_FILE.split("/")[-1].replace(".FEL.json", ".csv")
#print("# Saving:", OUTPUT)
#df.to_csv(OUTPUT)

#df["Site"] = df.index
#df["omega"] = df["beta"] / df["alpha"]
#df["Site"] = df.index
#df

In [28]:
source = df[df["omega"] < 10]
#source = df

line = alt.Chart(source).mark_line().encode(
    x='Site',
    y='omega', 
).properties(
    width=800,
    height=600)

line