In [17]:
import pandas
import os
import numpy
from IPython.core.display import display, HTML

In [34]:
def read_and_compare(dataset, channel="full"):
    result_dict = {}
    path = f"../data/trees/{channel}/{dataset}"
    control_df = pandas.read_csv(f"{path}/Control with Clustal Omega.csv", index_col=0)
    for file in os.listdir(path):
        if file.endswith(".csv"):
            basename = ".".join(file.split(".")[0:-1])
            df = pandas.read_csv(f"{path}/{file}", index_col=0)
            result_dict[basename] = round(numpy.sqrt(numpy.sum((control_df.values - df.values)**2)), 4)
    result_df = pandas.DataFrame(result_dict, index=[dataset])
    result_df.rename(columns={"MultiScale Structural Similarity Index Measure": channel}, inplace=True)
    result_df.index.name = "dataset"
    return result_df

In [35]:
channels = ("full", "red", "green", "blue")
datasets = ["orthologs_hemoglobin_beta", "orthologs_myoglobin", "orthologs_neuroglobin", "orthologs_cytoglobin", "orthologs_androglobin"]

In [36]:
sum_dfs = []
for channel in channels:
    dfs = []
    for dataset in datasets:
        dfs.append(read_and_compare(dataset, channel))
    sum_dfs.append(pandas.concat(dfs))
pandas.concat(sum_dfs, axis=1).T.drop_duplicates().T

Unnamed: 0_level_0,Control with Clustal Omega,Global with Needleman-Wunsch,Local with Smith–Waterman,full,red,green,blue
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
orthologs_hemoglobin_beta,0.0,0.9085,0.8937,0.1542,0.8342,0.7896,0.8
orthologs_myoglobin,0.0,0.9196,0.9044,0.0954,0.8046,0.7975,0.7811
orthologs_neuroglobin,0.0,1.8043,1.1514,1.2176,0.9789,0.9323,0.9554
orthologs_cytoglobin,0.0,6.3417,3.1095,2.7743,1.8149,1.7804,1.7816
orthologs_androglobin,0.0,8.2855,2.9601,4.0221,1.0736,1.0347,1.0623


In [43]:
def read_and_compare_channels(dataset):
    result_dict = {}
    dfs = {}
    for channel in channels:
        result_dict[channel] = {}
        path = f"../data/trees/{channel}/{dataset}"
        dfs[channel] = pandas.read_csv(f"{path}/MultiScale Structural Similarity Index Measure.csv", index_col=0)
    for channel in channels:
        control_df = dfs[channel]
        for c, df in dfs.items():
            result_dict[channel][c] = round(numpy.sqrt(numpy.sum((control_df.values - df.values)**2)), 4)
    result_df = pandas.DataFrame.from_dict(result_dict, orient='index')
    result_df.index.name = "dataset"
    return result_df

In [45]:
for dataset in datasets:
    display(HTML(f"<center><h3>{dataset}</h3></center>"))
    display(read_and_compare_channels(dataset))

Unnamed: 0_level_0,full,red,green,blue
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
full,0.0,0.6914,0.6469,0.6569
red,0.6914,0.0,0.0479,0.0384
green,0.6469,0.0479,0.0,0.0188
blue,0.6569,0.0384,0.0188,0.0


Unnamed: 0_level_0,full,red,green,blue
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
full,0.0,0.7273,0.7197,0.7035
red,0.7273,0.0,0.0237,0.0267
green,0.7197,0.0237,0.0,0.0285
blue,0.7035,0.0267,0.0285,0.0


Unnamed: 0_level_0,full,red,green,blue
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
full,0.0,1.7868,1.7258,1.769
red,1.7868,0.0,0.062,0.027
green,1.7258,0.062,0.0,0.0476
blue,1.769,0.027,0.0476,0.0


Unnamed: 0_level_0,full,red,green,blue
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
full,0.0,3.5723,3.5267,3.5294
red,3.5723,0.0,0.0605,0.0616
green,3.5267,0.0605,0.0,0.0217
blue,3.5294,0.0616,0.0217,0.0


Unnamed: 0_level_0,full,red,green,blue
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
full,0.0,3.2925,3.3489,3.3153
red,3.2925,0.0,0.0593,0.0262
green,3.3489,0.0593,0.0,0.0359
blue,3.3153,0.0262,0.0359,0.0
