### Imports

In [1]:
import pandas as pd
import numpy as np
import os
import json
import altair as alt

### Declares

In [2]:
# This can be passed in
JSON_FILE = "../results/BDNF/Recombinants/BDNF_codons_RDP_recombinationFree.fas.MEME.json"

# This can also be passed in
pvalueThreshold = 0.1

### Helper functions

In [3]:
def getMEMEData(json_file):
    # assert that the file exists
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["content"]["0"]
#end method

def getMEMEHeaders(json_file):
    # assert that the file exists
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["headers"]
#end method

### What are the results?

In [12]:
columns = getMEMEHeaders(JSON_FILE)
headers = [x[0] for x in columns]

df = pd.DataFrame(getMEMEData(JSON_FILE), columns=headers, dtype = float)
df["omega"] = df["&beta;<sup>+</sup>"] / df["alpha;"]
df.index += 1
df["Site"] = df.index
df

Unnamed: 0,alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,omega,Site
1,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,1
2,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,2
3,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,3
4,0.110160,0.063991,1.00,0.237329,0.00,0.000000,0.666667,0.0,0.0,-15.830682,-15.830682,2.154406,4
5,0.000000,0.000000,0.01,0.056410,0.99,0.367855,0.468318,1.0,0.0,-11.397964,-11.397954,inf,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,257
258,1.989725,0.000000,1.00,3.975941,0.00,0.000000,0.666667,0.0,0.0,-33.875124,-33.875124,1.998237,258
259,0.976214,0.000000,1.00,1.810637,0.00,0.000000,0.666667,0.0,0.0,-29.303129,-29.303129,1.854754,259
260,0.294442,0.000000,1.00,0.553655,0.00,0.000000,0.666667,0.0,0.0,-18.959692,-18.959692,1.880355,260


### Save results to csv file

In [13]:
df.to_csv("../tables/BDNF/BDNF_MEME_Table.csv", index=False)

### Significant results

In [5]:
df_results = df[df["p-value"] <= pvalueThreshold]
df_results # Meaning: Significant sites

Unnamed: 0,alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,omega,Site
26,0.142545,0.002318,0.01879044,1.12228,0.98121,6.892642,0.01428,9.0,0.0,-86.534893,-86.531919,7.873178,26
27,0.245863,0.076539,0.9902139,28.909267,0.009786,4.303857,0.054036,1.0,0.0,-39.247742,-37.012035,117.582648,27
30,0.0,0.0,1e-08,0.379836,1.0,4.629585,0.045649,6.0,0.0,-35.880179,-35.87515,inf,30
38,0.71573,0.0,0.9312423,8.069385,0.068758,3.798252,0.070282,5.0,0.0,-69.215742,-66.279311,11.274348,38
249,0.916453,0.0,0.9945051,203.106343,0.005495,8.25266,0.00714,1.0,0.0,-50.738018,-44.130251,221.622332,249
254,0.23197,0.0,0.9945051,1357.118243,0.005495,21.806784,8e-06,1.0,0.0,-44.498031,-31.946922,5850.39999,254


### Visual and Tables

In [15]:
import numpy as np
#df["omega"] = np.log10(df["omega"])

source = df

line = alt.Chart(source).mark_bar().encode(
    x='Site',
    y='omega',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True))
).properties(
    width=800,
    height=600)

line

#line.save('Figure2_MEME.png')

### Going with this one for now, log10 transformed omega values, colored by p-value

In [10]:
## Going with this one for now, log10 transformed omega values, colored by p-value

In [28]:
source = df
points = alt.Chart(source).mark_bar(clip=True).encode(
    x=alt.X('Site'),
    y=alt.Y('log10(omega)'), 
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True))
).properties(
    width=800,
    height=600)

line = alt.Chart(source).mark_line(
    color='black',
    size=2
).transform_window(
    rolling_mean='mean(log10(omega))',
    frame=[-10, 10]
).encode(
    x='Site:Q',
    y='rolling_mean:Q'
)


points + line

#points


In [31]:

source = df_results


points = alt.Chart(source).mark_bar().encode(
    x='Site',
    y=alt.Y('log10(omega)',
        scale=alt.Scale(domain=(0, 5), clamp=True)), 
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True))
).properties(
    width=800,
    height=600)

line = alt.Chart(source).mark_line(
    color='black',
    size=2
).transform_window(
    rolling_mean='mean(log10(omega))',
    frame=[-20, 20]
).encode(
    x='Site:Q',
    y='rolling_mean:Q'
).properties(
    width=800,
    height=600).interactive()

points + line

## Figure legend.

In [33]:
## Summary

a = len(df["omega"])
b = len(df_results["omega"])

print("MEME analysis of your gene of interest found " + str(b) + " of " + str(a) + " sites to be statisically significant (p-value <= " + str(pvalueThreshold) + ")" )


MEME analysis of your gene of interest found 6 of 261 sites to be statisically significant (p-value <= 0.1)


## Tables

In [34]:
df_AlnMap = pd.read_csv("../tables/BDNF/BDNF_AlignmentMap.csv")
df_AlnMap

Unnamed: 0,HumanBDNFSite,AlignmentSite
0,1,1
1,2,2
2,3,3
3,4,4
4,5,5
...,...,...
242,243,257
243,244,258
244,245,259
245,246,260


In [36]:
mapping = []

for site in df["Site"].to_list():
    #print(site)
    #map to df_AlnMap
    if site in df_AlnMap["AlignmentSite"].to_list():
        pass
        #mapping.append("0")
        #mapping.append(int(round(int(site) - 190 + 1, 0)))
        
        for n, item in enumerate(df_AlnMap["AlignmentSite"].to_list()):
            if item == site:
                pass
                mapping.append(n+1)
                break
            #end if
        #end for
        #print(n+1, site)
        
    else:
        mapping.append(np.nan)
    #end if
#end for

df["HumanBDNF"] = mapping
df


Unnamed: 0,alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,omega,Site,log10(omega),HumanBDNF
1,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,1,,1.0
2,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,2,,2.0
3,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,3,,3.0
4,0.110160,0.063991,1.00,0.237329,0.00,0.000000,0.666667,0.0,0.0,-15.830682,-15.830682,2.154406,4,0.333328,4.0
5,0.000000,0.000000,0.01,0.056410,0.99,0.367855,0.468318,1.0,0.0,-11.397964,-11.397954,inf,5,inf,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,257,,243.0
258,1.989725,0.000000,1.00,3.975941,0.00,0.000000,0.666667,0.0,0.0,-33.875124,-33.875124,1.998237,258,0.300647,244.0
259,0.976214,0.000000,1.00,1.810637,0.00,0.000000,0.666667,0.0,0.0,-29.303129,-29.303129,1.854754,259,0.268286,245.0
260,0.294442,0.000000,1.00,0.553655,0.00,0.000000,0.666667,0.0,0.0,-18.959692,-18.959692,1.880355,260,0.274240,246.0


In [35]:
mapping = []

for site in df_results["Site"].to_list():
    #print(site)
    #map to df_AlnMap
    if site in df_AlnMap["AlignmentSite"].to_list():
        pass
        #mapping.append("0")
        #mapping.append(int(round(int(site) - 190 + 1, 0)))
        
        for n, item in enumerate(df_AlnMap["AlignmentSite"].to_list()):
            if item == site:
                pass
                mapping.append(n+1)
                break
            #end if
        #end for
        #print(n+1, site)
        
    else:
        mapping.append(np.nan)
    #end if
#end for

df_results["HumanBDNF"] = mapping
df_results

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_results["HumanBDNF"] = mapping


Unnamed: 0,alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,omega,Site,log10(omega),HumanBDNF
26,0.142545,0.002318,0.01879044,1.12228,0.98121,6.892642,0.01428,9.0,0.0,-86.534893,-86.531919,7.873178,26,0.89615,26
27,0.245863,0.076539,0.9902139,28.909267,0.009786,4.303857,0.054036,1.0,0.0,-39.247742,-37.012035,117.582648,27,2.070343,27
30,0.0,0.0,1e-08,0.379836,1.0,4.629585,0.045649,6.0,0.0,-35.880179,-35.87515,inf,30,inf,29
38,0.71573,0.0,0.9312423,8.069385,0.068758,3.798252,0.070282,5.0,0.0,-69.215742,-66.279311,11.274348,38,1.052091,36
249,0.916453,0.0,0.9945051,203.106343,0.005495,8.25266,0.00714,1.0,0.0,-50.738018,-44.130251,221.622332,249,2.345614,238
254,0.23197,0.0,0.9945051,1357.118243,0.005495,21.806784,8e-06,1.0,0.0,-44.498031,-31.946922,5850.39999,254,3.767186,240


In [48]:
# Lookup test.

for site in df_results["Site"].to_list():
    if site in df_AlnMap["AlignmentSite"].to_list():
        pass
        #print(site)
        #mapping.append("0")
        #mapping.append(int(round(int(site) - 190 + 1, 0)))
        
        # lookup
        for n, item in enumerate(df_AlnMap["AlignmentSite"].to_list()):
            if item == site:
                pass
                break
            #end if
        #end for
        print(n+1, site)
    else:
        #mapping.append(np.nan)
        pass
    #end if
    
    
    
#end for


14 203
26 215
29 219


In [37]:
try:
    df_results = df_results.drop(['log10(omega)'], axis=1)
except:
    pass

In [11]:
df_results

Unnamed: 0,alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,omega,Site
26,0.142545,0.002318,0.01879044,1.12228,0.98121,6.892642,0.01428,9.0,0.0,-86.534893,-86.531919,7.873178,26
27,0.245863,0.076539,0.9902139,28.909267,0.009786,4.303857,0.054036,1.0,0.0,-39.247742,-37.012035,117.582648,27
30,0.0,0.0,1e-08,0.379836,1.0,4.629585,0.045649,6.0,0.0,-35.880179,-35.87515,inf,30
38,0.71573,0.0,0.9312423,8.069385,0.068758,3.798252,0.070282,5.0,0.0,-69.215742,-66.279311,11.274348,38
249,0.916453,0.0,0.9945051,203.106343,0.005495,8.25266,0.00714,1.0,0.0,-50.738018,-44.130251,221.622332,249
254,0.23197,0.0,0.9945051,1357.118243,0.005495,21.806784,8e-06,1.0,0.0,-44.498031,-31.946922,5850.39999,254


In [54]:
df_results.to_csv("BDNF_MEME_Table.csv", index=False)

In [38]:
try:
    df = df.drop(['log10(omega)'], axis=1)
except:
    pass

In [39]:
df

Unnamed: 0,alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,omega,Site,HumanBDNF
1,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,1,1.0
2,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,2,2.0
3,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,3,3.0
4,0.110160,0.063991,1.00,0.237329,0.00,0.000000,0.666667,0.0,0.0,-15.830682,-15.830682,2.154406,4,4.0
5,0.000000,0.000000,0.01,0.056410,0.99,0.367855,0.468318,1.0,0.0,-11.397964,-11.397954,inf,5,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,0.000000,0.000000,1.00,0.000000,0.00,0.000000,1.000000,0.0,0.0,0.000000,0.000000,,257,243.0
258,1.989725,0.000000,1.00,3.975941,0.00,0.000000,0.666667,0.0,0.0,-33.875124,-33.875124,1.998237,258,244.0
259,0.976214,0.000000,1.00,1.810637,0.00,0.000000,0.666667,0.0,0.0,-29.303129,-29.303129,1.854754,259,245.0
260,0.294442,0.000000,1.00,0.553655,0.00,0.000000,0.666667,0.0,0.0,-18.959692,-18.959692,1.880355,260,246.0


In [40]:
df.to_csv("../tables/BDNF/BDNF_MEME_Table_HumanMapped.csv", index=False)