In [1]:
import altair as alt
import pandas as pd
import numpy as np
# norms
# mean sj: (1,1,0,0,0,0,1,1)
# nice sj: (1,1,0,1,0,1,1,1)

ebnorms_b = ["(1,1,0,0,0,0,1,1)", "(1,1,0,1,0,1,1,1)", "(1,1,1,0,1,0,1,1)", "(0,0,0,0,1,1,1,1)", "(0,1,0,1,1,1,1,1)", "(0,1,1,1,0,1,1,1)",
          "(1,1,0,0,1,1,1,1)", "(1,1,0,1,1,1,1,1)", "(0,0,0,0,0,0,1,1)", "(0,1,0,1,0,1,1,1)", "(0,0,0,1,0,1,1,1)"]

ebnorms_n = ["SJ normal", "SJ nice", "SJ Mean", "IS normal", "IS nice", "IS weird",
             "SS normal", "SS nice", "SH normal", "SH normal", "IS Empirical"]

norms_simple_b = ["(1,0,0,1)", "(1,0,1,1)", "(0,0,0,1)", "(0,0,1,1)"]

norms_simple_n = ["SJ", "SS", "SH", "IS"]

columns = ["EBNorm", "Norm", "z", "g", "mu", "chi", "eps", "gamma", "pdx", "acr", "B", "G", "AllD", "pDisc", "Disc", "AllC", "mean", "nice"]

def load_and_process() -> pd.DataFrame: 
    data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)
    data.eps = data.eps / data.z
    data.chi = data.chi / data.z
    data.dropna()
    data.EBNorm = data.EBNorm.replace(ebnorms_b, ebnorms_n)
    data.Norm = data.Norm.replace(norms_simple_b, norms_simple_n)
    return data

def filter(frame: pd.DataFrame, feature: str, condition) -> pd.DataFrame:
    return frame[frame[feature]==condition]
    
data = load_and_process()


In [2]:
data

Unnamed: 0,EBNorm,Norm,z,g,mu,chi,eps,gamma,pdx,acr,B,G,AllD,pDisc,Disc,AllC,mean,nice
0,IS Empirical,IS,50,2500,0.1,0.00002,0.0002,0.0,True,34.215713,0.52,0.48,0.02,0.98,0.0,0.0,0.02,0.98
1,IS Empirical,IS,50,2500,0.1,0.00002,0.0002,0.0,True,28.055998,0.00,1.00,0.00,1.00,0.0,0.0,1.00,0.00
2,IS Empirical,IS,50,2500,0.1,0.00002,0.0002,0.0,True,97.649345,0.00,1.00,0.00,1.00,0.0,0.0,0.06,0.94
3,IS Empirical,IS,50,2500,0.1,0.00002,0.0002,0.0,True,56.141242,0.00,1.00,0.00,1.00,0.0,0.0,0.00,1.00
4,IS Empirical,IS,50,2500,0.1,0.00002,0.0002,0.0,True,3.908475,1.00,0.00,1.00,0.00,0.0,0.0,0.00,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2745,IS Empirical,IS,50,2500,0.1,0.20000,0.0002,0.8,True,56.466375,0.02,0.98,0.00,1.00,0.0,0.0,0.00,1.00
2746,IS Empirical,IS,50,2500,0.1,0.20000,0.0002,0.9,True,0.462797,1.00,0.00,1.00,0.00,0.0,0.0,1.00,0.00
2747,IS Empirical,IS,50,2500,0.1,0.20000,0.0002,0.9,True,62.507601,0.00,1.00,0.00,1.00,0.0,0.0,0.00,1.00
2748,IS Empirical,IS,50,2500,0.1,0.20000,0.0002,1.0,True,0.299037,0.02,0.98,1.00,0.00,0.0,0.0,0.00,1.00


In [3]:
chosen_norm: str = "IS Empirical"
filtered = data[data.EBNorm == chosen_norm]
filtered = filtered[filtered.Norm == "IS"]
filtered = filter(filtered, "z", 50)
filtered = filter(filtered, "eps", 0.0002)

#new_cols = ["gamma", "eps", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
new_cols = ["gamma", "chi", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
new_df = pd.DataFrame(columns=new_cols)

gammas = [round(a, 2) for a in np.arange(0, 1.01, 0.1)]
#epsilons = [0.00002, 0.0002, 0.002, 0.02, 0.2]
chis = [0.00002, 0.0002, 0.002, 0.02, 0.2]

In [4]:
for gamma in gammas:
    #for epsilon in epsilons:
    for chi in chis:
        #super_filtered = filtered[(filtered.gamma == gamma) & (filtered.eps == epsilon)]
        super_filtered = filtered[(filtered.gamma == gamma) & (filtered.chi == chi)]
        mean_acr: float = super_filtered.acr.mean()
        mean_greps: float = super_filtered.G.mean()
        mean_niceeps: float = super_filtered.nice.mean()
            
        mean_AllC: float = super_filtered.AllC.mean()
        mean_Disc: float = super_filtered.Disc.mean()
        mean_AllD: float = super_filtered.AllD.mean()
        mean_pDisc: float = super_filtered.pDisc.mean()
        #new_df.loc[len(new_df)] = [gamma, epsilon, mean_acr, mean_greps, mean_niceeps, 
        #                           mean_AllD, mean_pDisc, mean_Disc, mean_AllC]
        new_df.loc[len(new_df)] = [gamma, chi, mean_acr, mean_greps, mean_niceeps, 
                                   mean_AllD, mean_pDisc, mean_Disc, mean_AllC]

feature: str = "acr" + ":Q"
            
heatmap = alt.Chart(new_df).mark_rect().encode(
    x='chi:O',
    y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
    #color=alt.Color(feature, scale=alt.Scale(domain=[5,30]))
    color=feature
).properties(
    height=400,
    width=400,
    title = chosen_norm
)

text = heatmap.mark_text().encode(
    text=alt.Text(feature, format=".2f"),
    color=alt.value('black')
)

#heatmap+text

In [5]:
heatmap + text



In [43]:
chosen_norm: str = "IS Empirical"
filtered = data[data.EBNorm == chosen_norm]
filtered = filtered[filtered.Norm == "IS"]
filtered = filter(filtered, "z", 50)
filtered = filter(filtered, "chi", 0.01)
filtered = filter(filtered, "gamma", 1)
filtered = filter(filtered, "eps", 1)

filtered

Unnamed: 0,EBNorm,Norm,z,g,mu,chi,eps,gamma,pdx,acr,B,G,AllD,pDisc,Disc,AllC,mean,nice
87757,IS Empirical,IS,50.0,4000.0,0.1,0.01,1.0,1.0,True,4.580704,0.34,0.66,0.0,0.0,0.0,1.0,0.0,1.0
87778,IS Empirical,IS,50.0,4000.0,0.1,0.01,1.0,1.0,True,4.269001,1.0,0.0,0.0,0.0,0.02,0.98,1.0,0.0
87913,IS Empirical,IS,50.0,4000.0,0.1,0.01,1.0,1.0,True,0.461165,0.44,0.56,0.0,0.0,0.0,1.0,0.0,1.0
87914,IS Empirical,IS,50.0,4000.0,0.1,0.01,1.0,1.0,True,0.575406,0.48,0.52,0.0,0.02,0.0,0.98,0.02,0.98
87932,IS Empirical,IS,50.0,4000.0,0.1,0.01,1.0,1.0,True,4.342199,0.66,0.34,0.0,0.0,0.0,1.0,0.0,1.0
87951,IS Empirical,IS,50.0,4000.0,0.1,0.01,1.0,1.0,True,6.631747,0.06,0.94,0.0,0.0,0.0,1.0,0.0,1.0
87968,IS Empirical,IS,50.0,4000.0,0.1,0.01,1.0,1.0,True,0.825824,0.22,0.78,0.0,0.0,0.0,1.0,0.0,1.0
87976,IS Empirical,IS,50.0,4000.0,0.1,0.01,1.0,1.0,True,10.430136,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
87977,IS Empirical,IS,50.0,4000.0,0.1,0.01,1.0,1.0,True,0.803852,0.3,0.7,0.0,0.0,0.0,1.0,0.0,1.0
87978,IS Empirical,IS,50.0,4000.0,0.1,0.01,1.0,1.0,True,0.586662,0.64,0.36,0.0,0.24,0.0,0.76,0.24,0.76


In [18]:
data = load_and_process()
filtered = filter(data, "z", 50)
filtered = filter(data, "chi", 0.01)
filtered = filter(data, "Norm", "SJ")

new_cols = ["gamma", "eps", "acr"]
new_df = pd.DataFrame(columns=new_cols)

gammas = [round(a, 2) for a in np.arange(0, 1.01, 0.1)]
#epsilons = [round(a, 2) for a in np.arange(0, 1.01, 0.1)]
epsilons = [0.00002, 0.0002, 0.002, 0.02, 0.2]


for gamma in gammas:
    for epsilon in epsilons:
        super_filtered = filtered[(filtered.gamma == gamma) & (filtered.eps == epsilon)]
        #print(super_filtered.shape)
        
        mean_acr_nice: float = super_filtered[super_filtered.EBNorm=="IS Empirical"].acr.mean()
        mean_acr_normal: float = super_filtered[super_filtered.EBNorm=="IS normal"].acr.mean()
        new_df.loc[len(new_df)] = [gamma, epsilon, mean_acr_nice-mean_acr_normal]
        
        
        
feature: str = "acr" + ":Q"
            
heatmap = alt.Chart(new_df).mark_rect().encode(
    x='eps:O',
    y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
    #color=alt.Color(feature, scale=alt.Scale(domain=[-20,20], scheme='blueorange'), sort="descending")
    color=feature
).properties(
    height=400,
    width=400,
    title = "ACR Difference between EB-IS and IS"
)

text = heatmap.mark_text().encode(
    text=alt.Text(feature, format=".2f"),
    color=alt.value('black')
)

heatmap + text


  data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)
