### Imports

In [16]:
import pandas as pd
import numpy as np
import os
import json
import altair as alt

### Declares

In [17]:
# This can be passed in
JSON_FILE = os.path.join("H:\\", "AOC-REM2", "results", "mammalian_REM2", "mammalian_REM2_codons.SA.fasta.MEME.json")

# This can also be passed in
pvalueThreshold = 0.1

### Helper functions

In [18]:
def getMEMEData(json_file):
    # assert that the file exists
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["content"]["0"]
#end method

def getMEMEHeaders(json_file):
    # assert that the file exists
    with open(json_file, "r") as in_d:
        json_data = json.load(in_d)
    return json_data["MLE"]["headers"]
#end method

### What are the results?

In [19]:
columns = getMEMEHeaders(JSON_FILE)
headers = [x[0] for x in columns]

df = pd.DataFrame(getMEMEData(JSON_FILE), columns=headers, dtype = float)
#df["omega"] = df["&beta;<sup>+</sup>"] / df["&alpha;"]
df.index += 1
df["Site"] = df.index
df

Unnamed: 0,&alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,Variation p,Site
1,0.000000,0.000000,0.992864,11.797603,0.007136,9.909829,0.003078,1.0,0.0,-18.208283,-13.261810,0.007108,1
2,1.688158,0.163417,0.983715,10.082298,0.016285,2.096259,0.172849,1.0,0.0,-80.726016,-78.041588,0.068260,2
3,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,1.000000,3
4,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,1.000000,4
5,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,1.000000,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
652,0.000000,0.000000,0.860111,0.592448,0.139889,1.955131,0.186560,1.0,0.0,-16.112026,-15.581849,0.588501,652
653,0.622511,0.000000,0.893664,0.892982,0.106336,0.057907,0.599153,1.0,0.0,-48.212882,-47.515201,0.497738,653
654,4.037315,0.000000,0.942888,4.107561,0.057112,-0.001848,0.666667,3.0,0.0,-134.372661,-130.610607,0.023236,654
655,0.693206,0.045217,0.995256,94.891420,0.004744,7.527205,0.010330,1.0,0.0,-66.117162,-60.559231,0.003857,655


### Save results to csv file

In [20]:
#df.to_csv("../tables/BDNF/BDNF_MEME_Table.csv", index=False)

### Significant results

In [21]:
df_results = df[df["p-value"] <= pvalueThreshold]
df_results # Meaning: Significant sites

Unnamed: 0,&alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,Variation p,Site
1,0.000000,0.000000,0.992864,11.797603,0.007136,9.909829,0.003078,1.0,0.0,-18.208283,-13.261810,0.007108,1
8,0.473769,0.037227,0.963178,11.683360,0.036822,7.655931,0.009674,3.0,0.0,-60.847628,-52.528314,0.000244,8
18,0.458564,0.000000,0.958366,4.298746,0.041634,3.750858,0.072040,3.0,0.0,-43.538971,-38.919892,0.009862,18
24,0.427999,0.000000,0.916438,8.612928,0.083562,7.194835,0.012238,7.0,0.0,-93.208799,-88.865292,0.012991,24
25,0.489138,0.000000,0.995193,59.323509,0.004807,4.653984,0.045077,1.0,0.0,-51.598460,-47.069853,0.010796,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...
634,0.543100,0.000000,0.995242,68.886654,0.004758,9.057291,0.004743,1.0,0.0,-42.332130,-35.298708,0.000882,634
641,1.382479,0.000000,0.994176,22.513908,0.005824,3.811222,0.069808,1.0,0.0,-45.348966,-39.322010,0.002413,641
643,0.515140,0.000000,0.974216,7.722528,0.025784,6.982722,0.013637,2.0,0.0,-51.968012,-43.525012,0.000215,643
644,0.697437,0.000000,0.982666,9.071284,0.017334,6.010121,0.022430,1.0,0.0,-44.395143,-36.613815,0.000417,644


### Visual and Tables

In [22]:
import numpy as np
#df["omega"] = np.log10(df["omega"])

source = df

line = alt.Chart(source).mark_point().encode(
    x='Site',
    y='p-value',
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True))
).properties(
    width=800,
    height=600)

line

#line.save('Figure2_MEME.png')

In [31]:
source = df

points = alt.Chart(source).mark_bar(clip=True).encode(
    x=alt.X('Site'),
    y=alt.Y('p-value'), 
    color=alt.Color('p-value', scale=alt.Scale(scheme='reds', reverse=True))
).properties(
    width=800,
    height=600)

line = alt.Chart(source).mark_line(
    color='black',
    size=2
).transform_window(
    rolling_mean='mean(p-value)',
    frame=[-10, 10]
).encode(
    x='Site:Q',
    y='rolling_mean:Q'
)


points + line

#points


## Figure legend.

In [32]:
## Summary

a = len(df["Site"])
b = len(df_results["Site"])

print("MEME analysis of your gene of interest found " + str(b) + " of " + str(a) + " sites to be statisically significant (p-value <= " + str(pvalueThreshold) + ")" )


MEME analysis of your gene of interest found 61 of 656 sites to be statisically significant (p-value <= 0.1)


## Tables

In [34]:
df_AlnMap = pd.read_csv(os.path.join("..", "results", "mammalian_REM2", "mammalian_REM2_codons.SA.fasta_AlignmentMap.csv"))
df_AlnMap

Unnamed: 0,HumanSite,AlignmentSite
0,1,1
1,2,2
2,3,7
3,4,8
4,5,9
...,...,...
335,336,652
336,337,653
337,338,654
338,339,655


In [35]:
mapping = []

for site in df["Site"].to_list():
    if site in df_AlnMap["AlignmentSite"].to_list():
        for n, item in enumerate(df_AlnMap["AlignmentSite"].to_list()):
            if item == site:
                pass
                mapping.append(n+1)
                break
    else:
        mapping.append(np.nan)
    #end if
#end for

df["HumanREM2"] = mapping
df


Unnamed: 0,&alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,Variation p,Site,HumanREM2
1,0.000000,0.000000,0.992864,11.797603,0.007136,9.909829,0.003078,1.0,0.0,-18.208283,-13.261810,0.007108,1,1.0
2,1.688158,0.163417,0.983715,10.082298,0.016285,2.096259,0.172849,1.0,0.0,-80.726016,-78.041588,0.068260,2,2.0
3,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,1.000000,3,
4,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,1.000000,4,
5,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,1.000000,0.0,0.0,0.000000,0.000000,1.000000,5,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
652,0.000000,0.000000,0.860111,0.592448,0.139889,1.955131,0.186560,1.0,0.0,-16.112026,-15.581849,0.588501,652,336.0
653,0.622511,0.000000,0.893664,0.892982,0.106336,0.057907,0.599153,1.0,0.0,-48.212882,-47.515201,0.497738,653,337.0
654,4.037315,0.000000,0.942888,4.107561,0.057112,-0.001848,0.666667,3.0,0.0,-134.372661,-130.610607,0.023236,654,338.0
655,0.693206,0.045217,0.995256,94.891420,0.004744,7.527205,0.010330,1.0,0.0,-66.117162,-60.559231,0.003857,655,339.0


In [40]:
mapping = []

for site in df_results["Site"].to_list():
    if site in df_AlnMap["AlignmentSite"].to_list():
        pass   
        for n, item in enumerate(df_AlnMap["AlignmentSite"].to_list()):
            if item == site:
                pass
                mapping.append(n+1)
                break
            #end if
        #end for
        #print(n+1, site)
        
    else:
        mapping.append(np.nan)
    #end if
#end for

df_results["HumanREM2"] = mapping
df_results = df_results.reset_index(drop=True)
df_results.index += 1

# Save csv
df_results.to_csv( os.path.join("..", "results", "mammalian_REM2", "mammalian_REM2_MEME_Results.csv"), index=False)

df_results

Unnamed: 0,&alpha;,&beta;<sup>-</sup>,p<sup>-</sup>,&beta;<sup>+</sup>,p<sup>+</sup>,LRT,p-value,# branches under selection,Total branch length,MEME LogL,FEL LogL,Variation p,Site,HumanREM2
1,0.000000,0.000000,0.992864,11.797603,0.007136,9.909829,0.003078,1.0,0.0,-18.208283,-13.261810,0.007108,1,1.0
2,0.473769,0.037227,0.963178,11.683360,0.036822,7.655931,0.009674,3.0,0.0,-60.847628,-52.528314,0.000244,8,4.0
3,0.458564,0.000000,0.958366,4.298746,0.041634,3.750858,0.072040,3.0,0.0,-43.538971,-38.919892,0.009862,18,10.0
4,0.427999,0.000000,0.916438,8.612928,0.083562,7.194835,0.012238,7.0,0.0,-93.208799,-88.865292,0.012991,24,16.0
5,0.489138,0.000000,0.995193,59.323509,0.004807,4.653984,0.045077,1.0,0.0,-51.598460,-47.069853,0.010796,25,17.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,0.543100,0.000000,0.995242,68.886654,0.004758,9.057291,0.004743,1.0,0.0,-42.332130,-35.298708,0.000882,634,322.0
58,1.382479,0.000000,0.994176,22.513908,0.005824,3.811222,0.069808,1.0,0.0,-45.348966,-39.322010,0.002413,641,325.0
59,0.515140,0.000000,0.974216,7.722528,0.025784,6.982722,0.013637,2.0,0.0,-51.968012,-43.525012,0.000215,643,327.0
60,0.697437,0.000000,0.982666,9.071284,0.017334,6.010121,0.022430,1.0,0.0,-44.395143,-36.613815,0.000417,644,328.0
