In [55]:
# Imports
import pandas as pd
import plotly.express as px
from prettytable import PrettyTable
import plotly.graph_objects as go
import numpy as np
import random
import csv
import os
import json
from tqdm import tqdm
import math

In [56]:
# Local on Windows 10 box
#WD = r"D:\BUSTEDS-MH"
WD = os.path.join("E:\\", "BUSTEDS-MH")

# MAC OSX
#WD = "/Users/user/Documents/BUSTEDS-MH"

# Additional declares
BUSTEDS_DIR = os.path.join(WD, "analysis", "13-datasets", "BUSTEDS")
BUSTEDS_MH_DIR = os.path.join(WD, "analysis", "13-datasets", "BUSTEDS-MH")

BUSTEDS_OUTPUT_CSV = os.path.join(WD, "tables", "Table_13Datasets_BUSTEDS.csv")
BUSTEDS_MH_OUTPUT_CSV = os.path.join(WD, "tables", "Table_13Datasets_BUSTEDS-MH.csv")
OUTPUT_CSV = os.path.join(WD, "tables", "Table_13Datasets_BUSTEDS_and_BUSTEDS-MH.csv")

ER_Threshold = 5

In [57]:
def read_json(filename):
    print("# Reading:", filename)
    if os.stat(filename).st_size == 0: 
        print("# -- Error -- file is empty")
        return []
    #end if
    with open(filename, "r") as fh:
        json_data = json.load(fh)
    fh.close()
    return json_data
#end method

#define function to calculate cv
#cv = lambda x: np.std(x, ddof=1) / np.mean(x) * 100 
cv = lambda x: np.std(x) / np.mean(x)

pctchg = lambda a, b: (a / b) * 100

In [58]:
BUSTEDS_DIR_FILES = [os.path.join(BUSTEDS_DIR, file.name) for file in os.scandir(BUSTEDS_DIR) if file.name.endswith(".json")]
BUSTEDS_MH_DIR_FILES = [os.path.join(BUSTEDS_MH_DIR, file.name) for file in os.scandir(BUSTEDS_MH_DIR) if file.name.endswith(".json")]
print("# Number of BUSTEDS results:", len(BUSTEDS_DIR_FILES))
print("# Number of BUSTEDS-MH results:", len(BUSTEDS_MH_DIR_FILES))


# Number of BUSTEDS results: 13
# Number of BUSTEDS-MH results: 13


## Look over BUSTEDS-MH Files

In [59]:
df_dict = {}

for item in tqdm(BUSTEDS_MH_DIR_FILES):
    basename = os.path.basename(item).replace(".nex.BUSTEDS-MH.json", "")
    
    # Find BUSTEDS File
    #BUSTEDS_File = os.path.join(BUSTEDS_DIR, basename + ".nex.BUSTEDS.json")
    
    # Read 
    #print()
    json_data_BUSTEDS_MH = read_json(item)
    #json_data_BUSTEDS = read_json(BUSTEDS_File)
    
    #print("# Data loaded")
    df_dict[basename] = {"Method": "BUSTEDS-MH"}
    df_dict[basename].update({"Sequences": json_data_BUSTEDS_MH["input"]["number of sequences"]})
    df_dict[basename].update({"Codons": json_data_BUSTEDS_MH["input"]["number of sites"]})
    df_dict[basename].update({"LRT p-value": json_data_BUSTEDS_MH["test results"]["p-value"]})

    # cAIC
    df_dict[basename].update({"cAIC": json_data_BUSTEDS_MH["fits"]["Unconstrained model"]["AIC-c"]})
    
    # CV of omega
    A = json_data_BUSTEDS_MH["fits"]["Unconstrained model"]["Rate Distributions"]["Test"]["0"]["omega"] 
    B = json_data_BUSTEDS_MH["fits"]["Unconstrained model"]["Rate Distributions"]["Test"]["1"]["omega"] 
    C = json_data_BUSTEDS_MH["fits"]["Unconstrained model"]["Rate Distributions"]["Test"]["2"]["omega"] 
    #df_dict[basename].update({"CV(omega)": cv([A, B, C])})
    
    # CV of alpha
    D = json_data_BUSTEDS_MH["fits"]["Unconstrained model"]["Rate Distributions"]["Synonymous site-to-site rates"]["0"]["rate"] 
    E = json_data_BUSTEDS_MH["fits"]["Unconstrained model"]["Rate Distributions"]["Synonymous site-to-site rates"]["1"]["rate"] 
    F = json_data_BUSTEDS_MH["fits"]["Unconstrained model"]["Rate Distributions"]["Synonymous site-to-site rates"]["2"]["rate"] 
    #df_dict[basename].update({"CV(alpha)": cv([D, E, F])})
    
    # omega 3
    #df_dict[basename].update({"omega_3": json_data_BUSTEDS_MH["fits"]["Unconstrained model"]
    #                          ["Rate Distributions"]["Test"]["2"]["omega"]})
    #df_dict[basename].update({"proportion_3": json_data_BUSTEDS_MH["fits"]["Unconstrained model"]
    #                          ["Rate Distributions"]["Test"]["2"]["proportion"]})
    
    #Omegas and proportions
    data = json_data_BUSTEDS_MH["fits"]["Unconstrained model"]["Rate Distributions"]["Test"]
    #w1 = round(data["0"]["omega"], 4)
    #p1 = round(data["0"]["proportion"], 4)
    #w2 = round(data["1"]["omega"], 4)
    #p2 = round(data["1"]["proportion"], 4)
    #w3 = round(data["2"]["omega"], 4)
    #p3 = round(data["2"]["proportion"], 4)
    w1 = data["0"]["omega"]
    p1 = data["0"]["proportion"]
    w2 = data["1"]["omega"]
    p2 = data["1"]["proportion"]
    w3 = data["2"]["omega"]
    p3 = data["2"]["proportion"]
    
    df_dict[basename].update({"w1": w1, "p1": p1})
    df_dict[basename].update({"w2": w2, "p2": p2})
    df_dict[basename].update({"w3": w3, "p3": p3})
    
    # SRV rates and proportions
    data = json_data_BUSTEDS_MH["fits"]["Unconstrained model"]["Rate Distributions"]["Synonymous site-to-site rates"]
    #s1 = round(data["0"]["rate"], 4)
    #s_p1 = round(data["0"]["proportion"], 4)
    #s2 = round(data["1"]["rate"], 4)
    #s_p2 = round(data["1"]["proportion"], 4)
    #s3 = round(data["2"]["rate"], 4)
    #s_p3 = round(data["2"]["proportion"], 4)
    s1 = data["0"]["rate"]
    s_p1 = data["0"]["proportion"]
    s2 = data["1"]["rate"]
    s_p2 = data["1"]["proportion"]
    s3 = data["2"]["rate"]
    s_p3 = data["2"]["proportion"]
    
    df_dict[basename].update({"SRV1": s1, "SRV_p1": s_p1})
    df_dict[basename].update({"SRV2": s2, "SRV_p2": s_p2})
    df_dict[basename].update({"SRV3": s3, "SRV_p3": s_p3})
    
    # DH rate, TH rate, TH_SI rate
    df_dict[basename].update({"DH_Rate": float(json_data_BUSTEDS_MH["fits"]["Unconstrained model"]
                              ["rate at which 2 nucleotides are changed instantly within a single codon"])})
    df_dict[basename].update({"TH_Rate": float(json_data_BUSTEDS_MH["fits"]["Unconstrained model"]
                              ["rate at which 3 nucleotides are changed instantly within a single codon"])})
    df_dict[basename].update({"TH_Rate_SI": float(json_data_BUSTEDS_MH["fits"]["Unconstrained model"]
                              ["rate at which 3 nucleotides are changed instantly within a single codon between synonymous codon islands"])})

    # ER Sites, thresholded
    ER_SITES = []
    ER_df_dict = {}
    if "constrained" in json_data_BUSTEDS_MH["Evidence Ratios"].keys():
        #print("# ER Constrained Sites:", len(json_data_BUSTEDS_MH["Evidence Ratios"]["constrained"][0]))
        for site, val in enumerate(json_data_BUSTEDS_MH["Evidence Ratios"]["constrained"][0]):
            if val > ER_Threshold:
                ER_SITES.append(str(site + 1))
                ER_df_dict[site + 1] = {"BUSTEDS-MH ER": val}
            #end if
        #end for
        # add assert that there are more than 0 sites here.
        df_dict[basename].update({"BUSTEDS-MH_num_ER_Sites":  len(ER_df_dict.keys())})
        x = ER_df_dict.keys()
        x = [str(x) for x in x]
        df_dict[basename].update({"BUSTEDS-MH_ER_Sites":  "|".join(x)})
        #print(ER_df_dict.keys())
    #end if 
# end for

df_MH = pd.DataFrame.from_dict(df_dict, orient="index")
df_MH = df_MH.reset_index()
df_MH.index += 1
df_MH.rename(columns={'index': 'Gene'}, inplace = True)
#df_MH

100%|█████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 361.33it/s]

# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\adh.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\bglobin.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\camelid.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\COXI.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\ENCenv.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\flavNS5.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\HepatitisD.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\HIVvif.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\HIV_RT.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\InfluenzaA.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\lysin.nex.BUSTEDS-MH.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS-MH\lysozyme.nex.B




## Look over BUSTEDS Files

In [60]:
df_dict = {}

for item in tqdm(BUSTEDS_DIR_FILES):
    basename = os.path.basename(item).replace(".nex.BUSTEDS.json", "")
    # Read json
    #print()
    json_data_BUSTEDS = read_json(item)
    #print("# Data loaded:", item)
    
    df_dict[basename] = {"Method": "BUSTEDS"}
    df_dict[basename].update({"Sequences": json_data_BUSTEDS["input"]["number of sequences"]})
    df_dict[basename].update({"Codons": json_data_BUSTEDS["input"]["number of sites"]})
    df_dict[basename].update({"LRT p-value": json_data_BUSTEDS["test results"]["p-value"]})

    # cAIC
    df_dict[basename].update({"cAIC": json_data_BUSTEDS["fits"]["Unconstrained model"]["AIC-c"]})
    
    A = json_data_BUSTEDS["fits"]["Unconstrained model"]["Rate Distributions"]["Test"]["0"]["omega"] 
    B = json_data_BUSTEDS["fits"]["Unconstrained model"]["Rate Distributions"]["Test"]["1"]["omega"] 
    C = json_data_BUSTEDS["fits"]["Unconstrained model"]["Rate Distributions"]["Test"]["2"]["omega"] 
    #df_dict[basename].update({"CV(omega)": cv([A, B, C])})
    
    D = json_data_BUSTEDS["fits"]["Unconstrained model"]["Rate Distributions"]["Synonymous site-to-site rates"]["0"]["rate"] 
    E = json_data_BUSTEDS["fits"]["Unconstrained model"]["Rate Distributions"]["Synonymous site-to-site rates"]["1"]["rate"] 
    F = json_data_BUSTEDS["fits"]["Unconstrained model"]["Rate Distributions"]["Synonymous site-to-site rates"]["2"]["rate"] 
    #df_dict[basename].update({"CV(alpha)": cv([D, E, F])})
    
    #df_dict[basename].update({"omega_3": json_data_BUSTEDS["fits"]["Unconstrained model"]
    #                          ["Rate Distributions"]["Test"]["2"]["omega"]})
    #df_dict[basename].update({"proportion_3": json_data_BUSTEDS["fits"]["Unconstrained model"]
    #                          ["Rate Distributions"]["Test"]["2"]["proportion"]})
    
    #Omegas and proportions
    data = json_data_BUSTEDS["fits"]["Unconstrained model"]["Rate Distributions"]["Test"]
    w1 = data["0"]["omega"]
    p1 = data["0"]["proportion"]
    w2 = data["1"]["omega"]
    p2 = data["1"]["proportion"]
    w3 = data["2"]["omega"]
    p3 = data["2"]["proportion"]
    df_dict[basename].update({"w1": w1, "p1": p1})
    df_dict[basename].update({"w2": w2, "p2": p2})
    df_dict[basename].update({"w3": w3, "p3": p3})
    
    # SRV rates and proportions
    data = json_data_BUSTEDS["fits"]["Unconstrained model"]["Rate Distributions"]["Synonymous site-to-site rates"]
    s1 = data["0"]["rate"]
    s_p1 = data["0"]["proportion"]
    s2 = data["1"]["rate"]
    s_p2 = data["1"]["proportion"]
    s3 = data["2"]["rate"]
    s_p3 = data["2"]["proportion"]
    df_dict[basename].update({"SRV1": s1, "SRV_p1": s_p1})
    df_dict[basename].update({"SRV2": s2, "SRV_p2": s_p2})
    df_dict[basename].update({"SRV3": s3, "SRV_p3": s_p3})
    
    # ER Sites
    ER_SITES = []
    ER_df_dict = {}
    
    if "constrained" in json_data_BUSTEDS["Evidence Ratios"].keys():
        #print("# ER Constrained Sites:", len(json_data_BUSTEDS["Evidence Ratios"]["constrained"][0]))
        for site, val in enumerate(json_data_BUSTEDS["Evidence Ratios"]["constrained"][0]):
            if val > ER_Threshold:
                ER_SITES.append(str(site + 1))
                ER_df_dict[site + 1] = {"BUSTEDS ER": val}
            #end if
        #end for
        #df_dict[basename].update({"num_ER_Sites":  int(len(ER_df_dict.keys()))})
        df_dict[basename].update({"BUSTEDS_num_ER_Sites":  len(ER_df_dict.keys())})
        x = ER_df_dict.keys()
        x = [str(x) for x in x]
        df_dict[basename].update({"BUSTEDS_ER_Sites":  "|".join(x)})
        #print(ER_df_dict.keys())
    #end if   
# end for

df = pd.DataFrame.from_dict(df_dict, orient="index")
df = df.reset_index()
df.index += 1
df.rename(columns={'index': 'Gene'}, inplace = True)
#df

100%|█████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 333.53it/s]

# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\adh.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\bglobin.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\camelid.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\COXI.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\ENCenv.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\flavNS5.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\HepatitisD.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\HIVvif.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\HIV_RT.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\InfluenzaA.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\lysin.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\lysozyme.nex.BUSTEDS.json
# Reading: E:\BUSTEDS-MH\analysis\13-datasets\BUSTEDS\yok




## Calculate cAIC statistics

In [61]:
df["ΔcAIC"] = ""
df["RelativeSupport"] = ""
df["ER_Sites_Intersection"] = ""

for index, row in tqdm(df_MH.iterrows()):
    #MH_cAIC = df_MH[]
    #df_temp = df[df["Gene"] == gene]
    #print(df_temp)
    #print(row["Gene"], row["cAIC"])    
    print("# Processing", row["Gene"])
    gene = row["Gene"]
    MH_cAIC = float(row["cAIC"])
    
    BUSTEDS_cAIC = df[df["Gene"] == row["Gene"]]
    row_BUSTEDS = df[df["Gene"] == row["Gene"]]
    # df[df['LastName'] == 'Smith'].index
    index_BUSTEDS_cAIC = df[df["Gene"] == row["Gene"]].index
    index_BUSTEDS = df[df["Gene"] == row["Gene"]].index
    
    BUSTEDS_cAIC = float(BUSTEDS_cAIC["cAIC"])
    #print(float(BUSTEDS_cAIC["cAIC"]))
    best_model = min(MH_cAIC, BUSTEDS_cAIC)
    #print()
    #print("# Gene:", row["Gene"])

    if BUSTEDS_cAIC == best_model:
        which_is_best = "BUSTEDS"
        delta_cAIC = MH_cAIC - best_model
        relative_support = math.exp(-delta_cAIC/2)
        # add to table
        #df.at['C', 'x'] = 10
        # 
        df.at[index_BUSTEDS_cAIC, "ΔcAIC"] = delta_cAIC
        df.at[index_BUSTEDS_cAIC, "RelativeSupport"] = relative_support
    elif MH_cAIC == best_model:
        which_is_best = "BUSTEDS-MH"
        delta_cAIC = BUSTEDS_cAIC - best_model
        relative_support = math.exp(-delta_cAIC/2)
        df_MH.at[index, "ΔcAIC"] = delta_cAIC
        df_MH.at[index, "RelativeSupport"] = relative_support
    else:
        pass
    #end if
    #print("# Best model is:", best_model, which_is_best, "by", delta_cAIC)
    #print("# With relative support:", relative_support)
    
    # Intersections of ER Sites.
    print("# Examining ER Sites")
    # BUSTEDS-MH_ER_Sites
    # BUSTEDS_ER_Sites
    try:
        BUSTEDS_MH_ER_Sites = row["BUSTEDS-MH_ER_Sites"].split("|")
        BUSTEDS_df = df[df["Gene"] == row["Gene"]]
        BUSTEDS_ER_Sites    = BUSTEDS_df["BUSTEDS_ER_Sites"].tolist()[0].split("|")
        #print(BUSTEDS_MH_ER_Sites, BUSTEDS_ER_Sites)
        intersection = set(BUSTEDS_MH_ER_Sites).intersection(BUSTEDS_ER_Sites)
        print(intersection)
        df.at[index, "ER_Sites_Intersection"] = "|".join(intersection)
        df.at[index, "num_ER_Sites_Intersection"] = len(intersection)
    except:
        print("ERROR --", row["BUSTEDS-MH_ER_Sites"])
    #end try
    
    
    # Percent change w3
    #BUSTEDS_MH_w3 = row["w3"]
    #BUSTEDS_w3    = row_BUSTEDS["w3"])
    #w3_pctchg = pctchg(BUSTEDS_MH_w3, BUSTEDS_w3)
    #df_MH.at[index, "pctchg_w3"] = w3_pctchg
    df_MH.at[index, "pctchg_w3"] = pctchg(float(row["w3"]), float(row_BUSTEDS["w3"]))
    if float(row_BUSTEDS["p3"]) != 0:
        df_MH.at[index, "pctchg_p3"] = pctchg(float(row["p3"]), float(row_BUSTEDS["p3"]))
    else:
        df_MH.at[index, "pctchg_p3"] = np.nan
    
    df_MH.at[index, "w3_impact"] = float(row["w3"]) * float(row["w3"])
    df.at[index, "w3_impact"] = float(row_BUSTEDS["w3"]) * float(row_BUSTEDS["w3"])
#end for
    

13it [00:00, 216.78it/s]

# Processing adh
# Examining ER Sites
{'163', '197', '253', '165', '166', '6', '227', '216', '170', '133', '35', '69', '134', '49', '39'}
# Processing bglobin
# Examining ER Sites
{'110', '124', '50', '42', '54', '11', '74', '10', '116', '133', '48'}
# Processing camelid
# Examining ER Sites
{'57', '23', '54', '59', '32', '58', '78', '93', '52', '29', '80', '14', '40', '72', '33', '1', '53', '51', '50', '25'}
# Processing COXI
# Examining ER Sites
ERROR -- nan
# Processing ENCenv
# Examining ER Sites
ERROR -- nan
# Processing flavNS5
# Examining ER Sites
ERROR -- nan
# Processing HepatitisD
# Examining ER Sites
set()
# Processing HIVvif
# Examining ER Sites
ERROR -- nan
# Processing HIV_RT
# Examining ER Sites
{'181', '151', '122', '188', '245', '215', '64', '75', '69', '228', '48', '162'}
# Processing InfluenzaA
# Examining ER Sites
ERROR -- nan
# Processing lysin
# Examining ER Sites
{'70', '106', '41', '32', '107', '116', '119', '132', '6', '74', '87', '64', '75', '10', '44', '126',




## Concat tables


In [62]:
#df = df.sort_values(by="Sequences", ascending=False)
#df_MH = df_MH.sort_values(by="Sequences", ascending=False)

result = pd.concat([df_MH, df])
result = result.fillna("")
result = result.sort_values(by=["Gene", "Method"], ascending=True)
result = result.reset_index(drop=True)
result.index += 1
result

Unnamed: 0,Gene,Method,Sequences,Codons,LRT p-value,cAIC,w1,p1,w2,p2,...,BUSTEDS-MH_ER_Sites,pctchg_w3,pctchg_p3,w3_impact,ΔcAIC,RelativeSupport,BUSTEDS_num_ER_Sites,BUSTEDS_ER_Sites,ER_Sites_Intersection,num_ER_Sites_Intersection
1,COXI,BUSTEDS,21,510,0.5,24288.711706,0.0,0.530839,0.012662,0.459727,...,,,,1.175475,22.386048,1.4e-05,0.0,,,
2,COXI,BUSTEDS-MH,21,510,0.5,24311.097754,0.0,0.941922,0.273945,0.058078,...,,92.378533,0.0,1.003126,,,,,,
3,ENCenv,BUSTEDS,23,500,0.5,13699.078482,0.050236,1.0,0.115207,0.0,...,,,,1.384767,6.063006,0.048243,,,,
4,ENCenv,BUSTEDS-MH,23,500,0.5,13705.141488,0.038535,0.464794,0.059943,0.535206,...,,87.19683,,1.052878,,,,,,
5,HIV_RT,BUSTEDS,476,335,7.174705e-11,52048.518414,0.0,0.010372,0.152399,0.988619,...,,,,2397.486,,,22.0,36|39|48|64|65|69|72|75|103|104|122|138|151|16...,181|151|122|188|245|215|64|75|69|228|48|162,12.0
6,HIV_RT,BUSTEDS-MH,476,335,0.00131925,52037.174182,0.0,0.006741,0.150902,0.992547,...,48|64|69|75|122|151|162|181|188|215|228|245,88.574157,70.604002,1880.919,11.344231,0.003441,,,,
7,HIVvif,BUSTEDS,29,192,0.02270883,6911.64907,0.0,0.049597,0.759086,0.949923,...,,,,3248050.0,1.442446,0.486157,1.0,6,,
8,HIVvif,BUSTEDS-MH,29,192,0.5,6913.091516,0.59817,4e-06,0.610292,0.71441,...,,0.055487,59449.99258,1.0,,,,,,
9,HepatitisD,BUSTEDS,33,196,1.118888e-08,10424.225296,0.0,0.513135,0.668916,0.467758,...,,,,275.1555,,,23.0,6|9|13|17|24|28|31|35|38|75|85|90|117|122|140|...,,0.0
10,HepatitisD,BUSTEDS-MH,33,196,0.5,10423.732376,0.0,0.150724,0.074813,0.546368,...,,7.485536,1585.36127,1.541786,0.49292,0.781563,,,,


In [63]:
result.columns

Index(['Gene', 'Method', 'Sequences', 'Codons', 'LRT p-value', 'cAIC', 'w1',
       'p1', 'w2', 'p2', 'w3', 'p3', 'SRV1', 'SRV_p1', 'SRV2', 'SRV_p2',
       'SRV3', 'SRV_p3', 'DH_Rate', 'TH_Rate', 'TH_Rate_SI',
       'BUSTEDS-MH_num_ER_Sites', 'BUSTEDS-MH_ER_Sites', 'pctchg_w3',
       'pctchg_p3', 'w3_impact', 'ΔcAIC', 'RelativeSupport',
       'BUSTEDS_num_ER_Sites', 'BUSTEDS_ER_Sites', 'ER_Sites_Intersection',
       'num_ER_Sites_Intersection'],
      dtype='object')

In [64]:
"""dfv = result
dfv = dfv[['Gene', 'Sequences', 'Method', 'Codons', 'LRT p-value', 'cAIC', 'delta cAIC (best model)', 'Relative support',
       'CV(omega)', 'CV(alpha)', 'omega_3', 'proportion_3', 'DH_Rate',
       'TH_Rate', 'TH_Rate_SI', 'num_ER_Sites']]

dfv = dfv.fillna("")
dfv = dfv.sort_values(by=["Gene", "Method"], ascending=True)
dfv = dfv.reset_index(drop=True)
dfv.index += 1
"""

styled_table = result.style.background_gradient()
styled_table

Unnamed: 0,Gene,Method,Sequences,Codons,LRT p-value,cAIC,w1,p1,w2,p2,w3,p3,SRV1,SRV_p1,SRV2,SRV_p2,SRV3,SRV_p3,DH_Rate,TH_Rate,TH_Rate_SI,BUSTEDS-MH_num_ER_Sites,BUSTEDS-MH_ER_Sites,pctchg_w3,pctchg_p3,w3_impact,ΔcAIC,RelativeSupport,BUSTEDS_num_ER_Sites,BUSTEDS_ER_Sites,ER_Sites_Intersection,num_ER_Sites_Intersection
1,COXI,BUSTEDS,21,510,0.5,24288.711706,0.0,0.530839,0.012662,0.459727,1.084193,0.009435,0.033713,0.018909,0.571056,0.94399,12.406379,0.037101,,,,,,,,1.175475,22.386048,1.4e-05,0.0,,,
2,COXI,BUSTEDS-MH,21,510,0.5,24311.097754,0.0,0.941922,0.273945,0.058078,1.001562,0.0,0.0,0.006622,0.581815,0.468977,1.386616,0.524401,0.0,0.0,7.002608,,,92.378533,0.0,1.003126,,,,,,
3,ENCenv,BUSTEDS,23,500,0.5,13699.078482,0.050236,1.0,0.115207,0.0,1.176761,0.0,0.394251,0.294179,1.125621,0.679652,4.54703,0.026169,,,,,,,,1.384767,6.063006,0.048243,,,,
4,ENCenv,BUSTEDS-MH,23,500,0.5,13705.141488,0.038535,0.464794,0.059943,0.535206,1.026099,0.0,0.404335,0.304928,1.133397,0.669035,4.548329,0.026037,0.012054,0.0,0.0,,,87.19683,,1.052878,,,,,,
5,HIV_RT,BUSTEDS,476,335,0.0,52048.518414,0.0,0.010372,0.152399,0.988619,48.964132,0.001009,0.401593,0.514415,1.206216,0.414355,4.122033,0.07123,,,,,,,,2397.486186,,,22.0,36|39|48|64|65|69|72|75|103|104|122|138|151|162|163|181|188|207|215|219|228|245,181|151|122|188|245|215|64|75|69|228|48|162,12.0
6,HIV_RT,BUSTEDS-MH,476,335,0.001319,52037.174182,0.0,0.006741,0.150902,0.992547,43.369567,0.000712,0.399586,0.508307,1.191295,0.415507,3.962614,0.076186,0.040897,0.0,0.0,12.0,48|64|69|75|122|151|162|181|188|215|228|245,88.574157,70.604002,1880.919309,11.344231,0.003441,,,,
7,HIVvif,BUSTEDS,29,192,0.022709,6911.64907,0.0,0.049597,0.759086,0.949923,1802.234727,0.00048,0.29474,0.544405,1.159062,0.326581,3.573352,0.129015,,,,,,,,3248050.00993,1.442446,0.486157,1.0,6,,
8,HIVvif,BUSTEDS-MH,29,192,0.5,6913.091516,0.59817,4e-06,0.610292,0.71441,1.0,0.285586,0.298971,0.542319,1.154134,0.326209,3.509282,0.131472,0.00139,0.162476,0.0,,,0.055487,59449.99258,1.0,,,,,,
9,HepatitisD,BUSTEDS,33,196,0.0,10424.225296,0.0,0.513135,0.668916,0.467758,16.587812,0.019107,0.035107,0.209257,0.758552,0.596181,2.777617,0.194562,,,,,,,,275.155493,,,23.0,6|9|13|17|24|28|31|35|38|75|85|90|117|122|140|142|145|150|159|160|173|181|183,,0.0
10,HepatitisD,BUSTEDS-MH,33,196,0.5,10423.732376,0.0,0.150724,0.074813,0.546368,1.241687,0.302908,0.067812,0.238613,0.816145,0.562925,2.642278,0.198462,0.231317,0.0,0.0,0.0,,7.485536,1585.36127,1.541786,0.49292,0.781563,,,,


## Save table

In [65]:
print("Saving results to:", OUTPUT_CSV)
result.to_csv(OUTPUT_CSV, index=False)

Saving results to: E:\BUSTEDS-MH\tables\Table_13Datasets_BUSTEDS_and_BUSTEDS-MH.csv


## End of file

In [12]:
# Note Negative delta LL are convergence problems

In [13]:
# Lower AIC values indicate a better-fit model, and a model with a delta-AIC (the difference between the two AIC values being compared) of more than -2 is considered significantly better than the model it is being compared to

In [14]:
# Earth Mover's (Kantorovich) distance between two distrbuitions if you want a single number