In [1]:
import pandas as pd
import numpy as np
import os
import json
import altair as alt

In [28]:
JSON_FILE = "../results/TP53/TP53_codons.fasta.FUBAR.json"
posteriorThreshold = 0.9

In [4]:
def getFUBARData(json_file):
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["content"]["0"]
#end method

def getFUBARHeaders(json_file):
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["headers"]
#end method

In [12]:
columns = getFUBARHeaders(JSON_FILE)
headers = [x[0] for x in columns]
# For FUBAR

headers.append("Z")
headers.append("Y")
headers

['alpha',
 'beta',
 'beta-alpha',
 'Prob[alpha>beta]',
 'Prob[alpha<beta]',
 'BayesFactor[alpha<beta]',
 'Z',
 'Y']

### Selected Sites

In [35]:
df = pd.DataFrame(getFUBARData(JSON_FILE), columns=headers, dtype = float)
df.drop('Z', axis=1, inplace=True)
df.drop('Y', axis=1, inplace=True)

df["omega"] = df["beta"] / df["alpha"]
df.index += 1
df["Site"] = df.index
df

Unnamed: 0,alpha,beta,beta-alpha,Prob[alpha>beta],Prob[alpha<beta],BayesFactor[alpha<beta],omega,Site
1,5.065445,0.531438,-4.534007,0.914126,0.062461,0.305058,0.104914,1
2,2.632238,2.977492,0.345255,0.538367,0.304636,2.006010,1.131164,2
3,3.897658,6.624793,2.727134,0.418167,0.344822,2.409904,1.699685,3
4,3.799654,0.891051,-2.908603,0.881943,0.066285,0.325062,0.234509,4
5,4.317571,6.438737,2.121166,0.494943,0.258235,1.594093,1.491287,5
...,...,...,...,...,...,...,...,...
1163,3.696957,3.168089,-0.528868,0.690168,0.179246,1.000000,0.856945,1163
1164,4.174679,5.988063,1.813384,0.513747,0.257324,1.586519,1.434377,1164
1165,3.877447,4.669616,0.792169,0.587454,0.216833,1.267756,1.204302,1165
1166,4.053215,5.715136,1.661921,0.522059,0.253293,1.553233,1.410025,1166


## Positive sites

In [36]:
positive_sites = df[df["Prob[alpha<beta]"] >= posteriorThreshold]
positive_sites = positive_sites.reset_index()
positive_sites.index += 1
positive_sites.drop('index', axis=1, inplace=True)
positive_sites

Unnamed: 0,alpha,beta,beta-alpha,Prob[alpha>beta],Prob[alpha<beta],BayesFactor[alpha<beta],omega,Site
1,2.22912,16.843783,14.614662,0.033152,0.907157,44.740128,7.556246,392
2,1.279637,2.811616,1.531978,0.000344,0.925458,56.848229,2.197197,696


## Negative sites

In [37]:
negative_sites = df[df["Prob[alpha>beta]"] >= posteriorThreshold]
negative_sites = negative_sites.reset_index()
negative_sites.index += 1
negative_sites.drop('index', axis=1, inplace=True)
negative_sites

Unnamed: 0,alpha,beta,beta-alpha,Prob[alpha>beta],Prob[alpha<beta],BayesFactor[alpha<beta],omega,Site
1,5.065445,0.531438,-4.534007,0.914126,6.246079e-02,3.050578e-01,0.104914,1
2,2.982430,0.486267,-2.496163,0.901788,6.709953e-02,3.293429e-01,0.163044,14
3,7.788772,0.674003,-7.114769,0.983687,2.881691e-03,1.323320e-02,0.086535,29
4,12.315493,0.859938,-11.455555,0.971558,6.400461e-03,2.949606e-02,0.069826,59
5,14.353699,0.909346,-13.444353,0.976337,2.264859e-03,1.039418e-02,0.063353,60
...,...,...,...,...,...,...,...,...
414,2.967504,0.347090,-2.620414,1.000000,1.217242e-13,5.573670e-13,0.116964,1156
415,2.820644,0.793928,-2.026716,1.000000,1.419366e-13,6.499180e-13,0.281470,1157
416,2.811576,0.524440,-2.287136,1.000000,4.043943e-10,1.851694e-09,0.186529,1158
417,2.417082,0.225951,-2.191131,1.000000,3.675942e-10,1.683188e-09,0.093481,1159


## Visualizations

In [58]:
# Basic plot
source = df

line = alt.Chart(source).mark_line().encode(
    x='Site',
    y=alt.Y('omega'), 
).properties(
    width=800,
    height=600)

line

In [67]:
source = df

line = alt.Chart(source).mark_line().encode(
    x= 'Site',
    y= 'omega'
).properties(
    width=800,
    height=600)

line

In [71]:
source = df

line = alt.Chart(source).mark_circle().encode(
    x= 'beta',
    y= 'alpha'
).properties(
    width=800,
    height=600)

line

In [93]:
source = df

line = alt.Chart(source).mark_point().encode(
    x= 'Site',
    y= 'omega',
    color='omega',
    size='beta'
).properties(
    width=800,
    height=600)

line

In [113]:
df["BayesFactor"] = df["BayesFactor[alpha<beta]"]
df["ProbNegative"] = df["Prob[alpha>beta]"]
df["ProbPositive"] = df["Prob[alpha<beta]"]

source = df

line = alt.Chart(source).mark_circle().encode(
    x= 'Site',
    y= 'omega',
    size = "BayesFactor",
    color = "ProbPositive"
).properties(
    width=800,
    height=600)

line

In [136]:
# Using this one for now.

df["BayesFactor"] = df["BayesFactor[alpha<beta]"]
df["ProbNegative"] = df["Prob[alpha>beta]"]
df["ProbPositive"] = df["Prob[alpha<beta]"]

source = df

line = alt.Chart(source).mark_point().encode(
    x= 'Site',
    y= 'omega',
    color=alt.Color('ProbNegative', scale=alt.Scale(scheme='reds', reverse=False))
).properties(
    width=800,
    height=600)

line

In [142]:
# Negative sites

negative_sites["BayesFactor"] = negative_sites["BayesFactor[alpha<beta]"]
negative_sites["ProbNegative"] = negative_sites["Prob[alpha>beta]"]
negative_sites["ProbPositive"] = negative_sites["Prob[alpha<beta]"]

source = negative_sites

line = alt.Chart(source).mark_point().encode(
    x= 'Site',
    y= 'omega',
    color=alt.Color('ProbNegative', scale=alt.Scale(scheme='reds', reverse=False)),
    size=alt.Size('ProbNegative', scale=alt.Scale(reverse=False))
).properties(
    width=800,
    height=600)

line

In [139]:
negative_sites

Unnamed: 0,alpha,beta,beta-alpha,Prob[alpha>beta],Prob[alpha<beta],BayesFactor[alpha<beta],omega,Site
1,5.065445,0.531438,-4.534007,0.914126,6.246079e-02,3.050578e-01,0.104914,1
2,2.982430,0.486267,-2.496163,0.901788,6.709953e-02,3.293429e-01,0.163044,14
3,7.788772,0.674003,-7.114769,0.983687,2.881691e-03,1.323320e-02,0.086535,29
4,12.315493,0.859938,-11.455555,0.971558,6.400461e-03,2.949606e-02,0.069826,59
5,14.353699,0.909346,-13.444353,0.976337,2.264859e-03,1.039418e-02,0.063353,60
...,...,...,...,...,...,...,...,...
414,2.967504,0.347090,-2.620414,1.000000,1.217242e-13,5.573670e-13,0.116964,1156
415,2.820644,0.793928,-2.026716,1.000000,1.419366e-13,6.499180e-13,0.281470,1157
416,2.811576,0.524440,-2.287136,1.000000,4.043943e-10,1.851694e-09,0.186529,1158
417,2.417082,0.225951,-2.191131,1.000000,3.675942e-10,1.683188e-09,0.093481,1159


In [97]:
df

Unnamed: 0,alpha,beta,beta-alpha,Prob[alpha>beta],Prob[alpha<beta],BayesFactor[alpha<beta],omega,Site,BF
1,5.065445,0.531438,-4.534007,0.914126,0.062461,0.305058,0.104914,1,0.305058
2,2.632238,2.977492,0.345255,0.538367,0.304636,2.006010,1.131164,2,2.006010
3,3.897658,6.624793,2.727134,0.418167,0.344822,2.409904,1.699685,3,2.409904
4,3.799654,0.891051,-2.908603,0.881943,0.066285,0.325062,0.234509,4,0.325062
5,4.317571,6.438737,2.121166,0.494943,0.258235,1.594093,1.491287,5,1.594093
...,...,...,...,...,...,...,...,...,...
1163,3.696957,3.168089,-0.528868,0.690168,0.179246,1.000000,0.856945,1163,1.000000
1164,4.174679,5.988063,1.813384,0.513747,0.257324,1.586519,1.434377,1164,1.586519
1165,3.877447,4.669616,0.792169,0.587454,0.216833,1.267756,1.204302,1165,1.267756
1166,4.053215,5.715136,1.661921,0.522059,0.253293,1.553233,1.410025,1166,1.553233


## Figure legend.

In [53]:
## Summary

a = len(df["omega"])
b = len(negative_sites["omega"])
c = len(positive_sites["omega"])
print("FUBAR analysis of your gene of interest found " + str(b) + " of " + str(a) + " sites to be statisically significant (posterior probability threshold " + str(posteriorThreshold) + ") for episodic negative/purifying selection." )
print("FUBAR analysis of your gene of interest found " + str(c) + " of " + str(a) + " sites to be statisically significant (posterior probability threshold " + str(posteriorThreshold) + ") for episodic positive/diversifying selection." )


FUBAR analysis of your gene of interest found 418 of 1167 sites to be statisically significant (posterior probability threshold 0.9) for episodic negative/purifying selection.
FUBAR analysis of your gene of interest found 2 of 1167 sites to be statisically significant (posterior probability threshold 0.9) for episodic positive/diversifying selection.
