In [17]:
import pandas as pd
import numpy as np
import os
import json
import altair as alt

In [18]:
JSON_FILE = "../results/BDNF/Recombinants/BDNF_codons_RDP_recombinationFree.fas.FEL.json"
pvalueThreshold = 0.1

In [19]:
def getFELData(json_file):
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["content"]["0"]
#end method

def getFELHeaders(json_file):
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["headers"]
#end method

In [20]:
columns = getFELHeaders(JSON_FILE)
headers = [x[0] for x in columns]
headers

['alpha', 'beta', 'alpha=beta', 'LRT', 'p-value', 'Total branch length']

In [21]:
data = getFELData(JSON_FILE)

### Selected Sites -- Tables

In [22]:
df = pd.DataFrame(getFELData(JSON_FILE), columns=headers, dtype = float)
df["omega"] = df["beta"] / df["alpha"]
df.index += 1
df["Site"] = df.index
df

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
1,0.000000,0.000000,0.000000,0.000000,1.000000e+00,0.0,,1
2,0.000000,0.000000,0.000000,0.000000,1.000000e+00,0.0,,2
3,0.000000,0.000000,0.000000,0.000000,1.000000e+00,0.0,,3
4,0.111591,0.063501,0.081098,0.157296,6.916580e-01,0.0,0.569049,4
5,0.000000,0.056423,0.046807,0.360646,5.481474e-01,0.0,inf,5
...,...,...,...,...,...,...,...,...
257,0.000000,0.000000,0.000000,0.000000,1.000000e+00,0.0,,257
258,1.962963,0.000000,0.416511,30.383984,3.544426e-08,0.0,0.000000,258
259,0.967856,0.000000,0.200388,15.402241,8.688520e-05,0.0,0.000000,259
260,0.292869,0.000000,0.108310,5.940956,1.479299e-02,0.0,0.000000,260


In [23]:
df_results = df[df["p-value"] <= pvalueThreshold]
df_results

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
6,0.222061,0.000000,0.080486,4.042946,4.435633e-02,0.0,0.000000,6
10,0.160668,0.000000,0.040904,2.736184,9.809876e-02,0.0,0.000000,10
11,0.172884,0.000000,0.089985,3.118123,7.742590e-02,0.0,0.000000,11
13,1.962963,0.000000,0.284986,22.636501,1.957305e-06,0.0,0.000000,13
14,0.000000,0.217151,0.143806,3.085672,7.898464e-02,0.0,inf,14
...,...,...,...,...,...,...,...,...
253,0.166849,0.000000,0.039447,2.883748,8.947755e-02,0.0,0.000000,253
255,1.209290,0.082259,0.511097,11.517049,6.896078e-04,0.0,0.068023,255
258,1.962963,0.000000,0.416511,30.383984,3.544426e-08,0.0,0.000000,258
259,0.967856,0.000000,0.200388,15.402241,8.688520e-05,0.0,0.000000,259


In [24]:
positive_sites = df_results[df_results["omega"] > 1.0]
positive_sites = positive_sites.reset_index()
positive_sites.index += 1
positive_sites.drop('index', axis=1, inplace=True)
positive_sites

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
1,0.0,0.217151,0.143806,3.085672,0.078985,0.0,inf,14
2,0.139608,1.089625,0.78165,6.88625,0.008686,0.0,7.804882,26
3,0.0,0.379651,0.257918,4.627491,0.031464,0.0,inf,30


In [25]:
negative_sites = df_results[df_results["omega"] < 1.0]
negative_sites = negative_sites.reset_index()
negative_sites.index += 1
negative_sites.drop('index', axis=1, inplace=True)
negative_sites

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site
1,0.222061,0.000000,0.080486,4.042946,4.435633e-02,0.0,0.000000,6
2,0.160668,0.000000,0.040904,2.736184,9.809876e-02,0.0,0.000000,10
3,0.172884,0.000000,0.089985,3.118123,7.742590e-02,0.0,0.000000,11
4,1.962963,0.000000,0.284986,22.636501,1.957305e-06,0.0,0.000000,13
5,0.341252,0.000000,0.110162,6.767622,9.282644e-03,0.0,0.000000,18
...,...,...,...,...,...,...,...,...
170,0.166849,0.000000,0.039447,2.883748,8.947755e-02,0.0,0.000000,253
171,1.209290,0.082259,0.511097,11.517049,6.896078e-04,0.0,0.068023,255
172,1.962963,0.000000,0.416511,30.383984,3.544426e-08,0.0,0.000000,258
173,0.967856,0.000000,0.200388,15.402241,8.688520e-05,0.0,0.000000,259


In [26]:
#df = pd.DataFrame(getFELData(JSON_FILE), columns=headers, dtype = float)
#df.index += 1

# Save the DF here.
#OUTPUT = JSON_FILE.split("/")[-1].replace(".FEL.json", ".csv")
#print("# Saving:", OUTPUT)
#df.to_csv(OUTPUT)

#df["Site"] = df.index
#df["omega"] = df["beta"] / df["alpha"]
#df["Site"] = df.index
#df

## Visualizations

In [27]:
#source = df[df["omega"] < 10]
source = df

line = alt.Chart(source).mark_line().encode(
    x='Site',
    y='omega', 
).properties(
    width=800,
    height=600)

line

In [28]:

source = df

line = alt.Chart(source).mark_line().encode(
    x='Site',
    y=alt.Y('omega',
        scale=alt.Scale(domain=(0, 10), clamp=True)),
    
    
).properties(
    width=800,
    height=600)

line

In [29]:
import numpy as np
df["log10(omega)"] = np.log10(df["omega"])

source = df

line = alt.Chart(source).mark_bar().encode(
    x='Site',
    y='log10(omega)',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True))
    
).properties(
    width=800,
    height=600)

line

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [30]:
import numpy as np
negative_sites["log10(omega)"] = np.log10(negative_sites["omega"])

source = negative_sites

line = alt.Chart(source).mark_bar().encode(
    x='Site',
    y='log10(omega)',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True))
    
).properties(
    width=800,
    height=600)

line

In [31]:
import numpy as np
source = negative_sites

line = alt.Chart(source).mark_point().encode(
    x='Site',
    y='omega',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True)),
    size=alt.Size('p-value', scale=alt.Scale(reverse=True))
    
).properties(
    width=800,
    height=600)

line

## Go with this one for now

In [32]:
# Only the negative sites
source = negative_sites

line = alt.Chart(source).mark_circle().encode(
    x='Site',
    y='omega',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True)),
    size=alt.Size('p-value', scale=alt.Scale(reverse=True))
    
).properties(
    width=800,
    height=600)

line

In [17]:
import numpy as np
source = negative_sites

line = alt.Chart(source).mark_point().encode(
    x='Site',
    y='omega',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True)),
    size=alt.Size('p-value', scale=alt.Scale(reverse=True))
    
).properties(
    width=800,
    height=600)

line

## Figure legend.

In [15]:
## Summary

a = len(df["omega"])
b = len(negative_sites["omega"])

c = round((b/a) * 100, 3)

print("FEL analysis of your gene of interest found " + str(b) + " of " + str(a) + " sites to be statistically  significant (p-value <= " + str(pvalueThreshold) + ") for pervasive negative/purifying selection" )
print(c)

FEL analysis of your gene of interest found 174 of 261 sites to be statisically significant (p-value <= 0.1) for pervasive negative/purifying selection
66.667


## Table


In [20]:
negative_sites

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site,log10(omega)
1,2.200000,0.000000,0.278924,3.556121,5.932622e-02,0.0,0.000000,4,-inf
2,2.809524,0.000000,0.266035,4.446409,3.497473e-02,0.0,0.000000,5,-inf
3,2.809524,0.000000,0.590403,5.470820,1.933655e-02,0.0,0.000000,12,-inf
4,2.446941,0.000000,0.478269,3.514665,6.082795e-02,0.0,0.000000,25,-inf
5,1.111980,0.123758,0.305334,3.267494,7.066548e-02,0.0,0.111296,48,-0.953522
...,...,...,...,...,...,...,...,...,...
181,0.187889,0.000000,0.067505,4.096014,4.298445e-02,0.0,0.000000,442,-inf
182,1.352941,0.000000,0.402404,18.854179,1.411016e-05,0.0,0.000000,443,-inf
183,2.095658,0.000000,0.404642,32.101216,1.463457e-08,0.0,0.000000,446,-inf
184,0.962978,0.000000,0.194446,15.753703,7.214658e-05,0.0,0.000000,447,-inf


In [21]:
df_AlnMap = pd.read_csv("BDNF_AlignmentMap.csv")
df_AlnMap

Unnamed: 0,HumanBDNFSite,AlignmentSite
0,1,190
1,2,191
2,3,192
3,4,193
4,5,194
...,...,...
242,243,445
243,244,446
244,245,447
245,246,448


In [24]:
mapping = []

for site in negative_sites["Site"].to_list():
    if site in df_AlnMap["AlignmentSite"].to_list():
        for n, item in enumerate(df_AlnMap["AlignmentSite"].to_list()):
            if item == site:
                mapping.append(n+1)
                break
            #end if
        #end for
    else:
        mapping.append(np.nan)
    #end if
#end for

negative_sites["HumanBDNF"] = mapping
negative_sites

Unnamed: 0,alpha,beta,alpha=beta,LRT,p-value,Total branch length,omega,Site,log10(omega),HumanBDNF
1,2.200000,0.000000,0.278924,3.556121,5.932622e-02,0.0,0.000000,4,-inf,
2,2.809524,0.000000,0.266035,4.446409,3.497473e-02,0.0,0.000000,5,-inf,
3,2.809524,0.000000,0.590403,5.470820,1.933655e-02,0.0,0.000000,12,-inf,
4,2.446941,0.000000,0.478269,3.514665,6.082795e-02,0.0,0.000000,25,-inf,
5,1.111980,0.123758,0.305334,3.267494,7.066548e-02,0.0,0.111296,48,-0.953522,
...,...,...,...,...,...,...,...,...,...,...
181,0.187889,0.000000,0.067505,4.096014,4.298445e-02,0.0,0.000000,442,-inf,240.0
182,1.352941,0.000000,0.402404,18.854179,1.411016e-05,0.0,0.000000,443,-inf,241.0
183,2.095658,0.000000,0.404642,32.101216,1.463457e-08,0.0,0.000000,446,-inf,244.0
184,0.962978,0.000000,0.194446,15.753703,7.214658e-05,0.0,0.000000,447,-inf,245.0


In [25]:
try:
    negative_sites = negative_sites.drop(['log10(omega)'], axis=1)
except:
    pass

negative_sites.to_csv("BDNF_FEL_Negative_Table.csv", index=False)