In [1]:
import pandas as pd
import numpy as np
import os
import json
import altair as alt

In [2]:
JSON_FILE = "../results/REV3L/REV3L_codons.fasta.MEME.json"
pvalueThreshold = 0.1

In [3]:
def getMEMEData(json_file):
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["content"]["0"]
#end method

def getMEMEHeaders(json_file):
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["headers"]
#end method

In [4]:
columns = getMEMEHeaders(JSON_FILE)
headers = [x[0] for x in columns]
headers

['alpha;',
 '&beta;<sup>-</sup>',
 'p<sup>-</sup>',
 '&beta;<sup>+</sup>',
 'p<sup>+</sup>',
 'LRT',
 'p-value',
 '# branches under selection',
 'Total branch length',
 'MEME LogL',
 'FEL LogL']

In [6]:
data = getMEMEData(JSON_FILE)

### Selected Sites

In [8]:
df = pd.DataFrame(data, columns=headers, dtype = float)
df["omega"] = df["&beta;<sup>+</sup>"] / df["alpha;"]
df.index += 1
df["Site"] = df.index
df

Unnamed: 0,alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,omega,Site
1,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,1
2,72.542993,4.471838,1.000000,108.814490,0.000000,0.000000,0.666667,0.0,0.0,-8.724557,-8.724557,1.500000,2
3,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,3
4,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,4
5,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4730,0.905556,0.000000,1.000000,1.536748,0.000000,0.000000,0.666667,0.0,0.0,-111.404894,-111.404850,1.697022,4730
4731,0.725717,0.021700,1.000000,1.332115,0.000000,0.000000,0.666667,0.0,0.0,-107.643832,-107.643832,1.835585,4731
4732,0.411645,0.062010,1.000000,0.617467,0.000000,0.000000,0.666667,0.0,0.0,-55.531593,-55.531593,1.500000,4732
4733,0.150060,0.000000,0.988023,44.102185,0.011977,9.037367,0.004791,3.0,0.0,-52.990414,-46.247994,293.897923,4733


In [9]:
df_results = df[df["p-value"] < pvalueThreshold]
df_results

Unnamed: 0,alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,omega,Site
11,2.523894,0.044854,0.983777,39.159120,0.016223,8.306036,6.948272e-03,3.0,0.0,-111.318773,-99.390860,15.515357,11
12,1.720859,0.111045,0.983605,61.986685,0.016395,12.878986,6.853336e-04,3.0,0.0,-149.635899,-139.754194,36.020775,12
17,1.040061,0.000000,0.988982,10000.000000,0.011018,29.035237,2.030200e-07,1.0,0.0,-145.570556,-123.077149,9614.822056,17
18,1.269906,0.119594,0.974701,34.841046,0.025299,5.301014,3.228622e-02,1.0,0.0,-143.768562,-138.402900,27.435922,18
19,0.417662,0.000000,0.985635,441.099943,0.014365,30.906501,7.940985e-08,3.0,0.0,-113.266105,-92.124435,1056.118205,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4723,0.296795,0.080054,0.982443,11.956028,0.017557,8.857884,5.248748e-03,2.0,0.0,-82.858079,-76.022025,40.283779,4723
4726,1.025981,0.000000,0.996576,91.133375,0.003424,9.900868,3.091725e-03,1.0,0.0,-75.511364,-66.060823,88.825590,4726
4727,0.977892,0.000000,0.992897,65.801710,0.007103,11.576310,1.323723e-03,2.0,0.0,-130.248215,-119.992235,67.289328,4727
4733,0.150060,0.000000,0.988023,44.102185,0.011977,9.037367,4.791452e-03,3.0,0.0,-52.990414,-46.247994,293.897923,4733


In [10]:
positive_sites = df_results[df_results["omega"] > 1.0]
positive_sites = positive_sites.reset_index()
positive_sites.index += 1
positive_sites.drop('index', axis=1, inplace=True)
positive_sites

Unnamed: 0,alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,omega,Site
1,2.523894,0.044854,0.983777,39.159120,0.016223,8.306036,6.948272e-03,3.0,0.0,-111.318773,-99.390860,15.515357,11
2,1.720859,0.111045,0.983605,61.986685,0.016395,12.878986,6.853336e-04,3.0,0.0,-149.635899,-139.754194,36.020775,12
3,1.040061,0.000000,0.988982,10000.000000,0.011018,29.035237,2.030200e-07,1.0,0.0,-145.570556,-123.077149,9614.822056,17
4,1.269906,0.119594,0.974701,34.841046,0.025299,5.301014,3.228622e-02,1.0,0.0,-143.768562,-138.402900,27.435922,18
5,0.417662,0.000000,0.985635,441.099943,0.014365,30.906501,7.940985e-08,3.0,0.0,-113.266105,-92.124435,1056.118205,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...
432,0.296795,0.080054,0.982443,11.956028,0.017557,8.857884,5.248748e-03,2.0,0.0,-82.858079,-76.022025,40.283779,4723
433,1.025981,0.000000,0.996576,91.133375,0.003424,9.900868,3.091725e-03,1.0,0.0,-75.511364,-66.060823,88.825590,4726
434,0.977892,0.000000,0.992897,65.801710,0.007103,11.576310,1.323723e-03,2.0,0.0,-130.248215,-119.992235,67.289328,4727
435,0.150060,0.000000,0.988023,44.102185,0.011977,9.037367,4.791452e-03,3.0,0.0,-52.990414,-46.247994,293.897923,4733


In [54]:
#df = pd.DataFrame(getFELData(JSON_FILE), columns=headers, dtype = float)
#df.index += 1

# Save the DF here.
#OUTPUT = JSON_FILE.split("/")[-1].replace(".FEL.json", ".csv")
#print("# Saving:", OUTPUT)
#df.to_csv(OUTPUT)

#df["Site"] = df.index
#df["omega"] = df["beta"] / df["alpha"]
#df["Site"] = df.index
#df

In [11]:
source = df[df["omega"] < 10]
#source = df

line = alt.Chart(source).mark_line().encode(
    x='Site',
    y='omega', 
).properties(
    width=800,
    height=600)

line