In [4]:
!pip install altair pandas numpy
import altair as alt
import pandas as pd
import numpy as np
# norms
# mean sj: (1,1,0,0,0,0,1,1)
# nice sj: (1,1,0,1,0,1,1,1)

ebnorms_b = ["(1,1,0,0,0,0,1,1)", "(1,1,0,1,0,1,1,1)", "(1,1,1,0,1,0,1,1)", "(0,0,0,0,1,1,1,1)", "(0,1,0,1,1,1,1,1)", "(0,1,1,1,0,1,1,1)",
          "(1,1,0,0,1,1,1,1)", "(1,1,0,1,1,1,1,1)", "(0,0,0,0,0,0,1,1)", "(0,1,0,1,0,1,1,1)", "(0,0,0,1,0,1,1,1)"]

ebnorms_n = ["SJ normal", "SJ nice", "SJ Mean", "IS normal", "IS nice", "IS weird",
             "SS normal", "SS nice", "SH normal", "SH normal", "IS Empirical"]

norms_simple_b = ["(1,0,0,1)", "(1,0,1,1)", "(0,0,0,1)", "(0,0,1,1)"]

norms_simple_n = ["SJ", "SS", "SH", "IS"]

columns = ["EBNorm", "Norm", "z", "g", "mu", "chi", "eps", "alpha", "gamma", "pdx", "acr", "B", "G", "AllD", "pDisc", "Disc", "AllC", "mean", "nice"]

def load_and_process() -> pd.DataFrame: 
    data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)
    data.eps = data.eps / data.z
    data.chi = data.chi / data.z
    data.alpha = data.alpha / data.z
    data.dropna()
    data.EBNorm = data.EBNorm.replace(ebnorms_b, ebnorms_n)
    data.Norm = data.Norm.replace(norms_simple_b, norms_simple_n)
    return data

def filter(frame: pd.DataFrame, feature: str, condition) -> pd.DataFrame:
    return frame[frame[feature]==condition]
    
data = load_and_process()




In [5]:
data[data.EBNorm=="IS Empirical"].chi.value_counts()

0.00002    6069
0.00200     560
0.02000     560
0.20000     560
0.00020     556
Name: chi, dtype: int64

In [6]:
chosen_norm: str = "IS Empirical"
filtered = data[data.EBNorm == chosen_norm]
filtered = filtered[filtered.Norm == "IS"]
filtered = filter(filtered, "z", 50)
filtered = filter(filtered, "eps", 0.00002)
filtered = filter(filtered, "chi", 0.00002)
#filtered = filter(filtered, "alpha", 0.00002)

#new_cols = ["gamma", "eps", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
#new_cols = ["gamma", "chi", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
new_cols = ["gamma", "alpha", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
new_df = pd.DataFrame(columns=new_cols)

gammas = [round(a, 2) for a in np.arange(0, 1.01, 0.1)]
epsilons = [0.00002, 0.0002, 0.002, 0.02, 0.2]
chis = [0.00002, 0.0002, 0.002, 0.02, 0.2]
alphas = [0.00002, 0.0002, 0.002, 0.02, 0.2]

In [7]:
for gamma in gammas:
    #for epsilon in epsilons:
    #for chi in chis:
    for alpha in alphas:
        #super_filtered = filtered[(filtered.gamma == gamma) & (filtered.eps == epsilon)]
        #super_filtered = filtered[(filtered.gamma == gamma) & (filtered.chi == chi)]
        super_filtered = filtered[(filtered.gamma == gamma) & (filtered.alpha == alpha)]
        
        mean_acr: float = super_filtered.acr.mean()
        mean_greps: float = super_filtered.G.mean()
        mean_niceeps: float = super_filtered.nice.mean()
    
        mean_AllC: float = super_filtered.AllC.mean()
        mean_Disc: float = super_filtered.Disc.mean()
        mean_AllD: float = super_filtered.AllD.mean()
        mean_pDisc: float = super_filtered.pDisc.mean()
        #new_df.loc[len(new_df)] = [gamma, epsilon, mean_acr, mean_greps, mean_niceeps, mean_AllD, mean_pDisc, mean_Disc, mean_AllC]
        #new_df.loc[len(new_df)] = [gamma, chi, mean_acr, mean_greps, mean_niceeps, mean_AllD, mean_pDisc, mean_Disc, mean_AllC]
        new_df.loc[len(new_df)] = [gamma, alpha, mean_acr, mean_greps, mean_niceeps, mean_AllD, mean_pDisc, mean_Disc, mean_AllC]

feature: str = "acr" + ":Q"
            
heatmap = alt.Chart(new_df).mark_rect().encode(
    #x='chi:O',
    #x='eps:O',
    x='alpha:O',
    y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
    color=alt.Color(feature, scale=alt.Scale(domain=[0,90]))
).properties(
    height=400,
    width=400,
    title = chosen_norm
)

text = heatmap.mark_text().encode(
    text=alt.Text(feature, format=".2f"),
    color=alt.value('black')
)

#heatmap+text

In [8]:
heatmap + text



## DIFFERENCE

In [11]:
data = load_and_process()
filtered = filter(data, "z", 50)
filtered = filter(filtered, "eps", 0.00002)
filtered = filter(filtered, "chi", 0.00002)
#filtered = filter(filtered, "alpha", 0.00002)

#new_cols = ["gamma", "eps", "acr"]
#new_cols = ["gamma", "chi", "acr"]
new_cols = ["gamma", "alpha", "acr"]
new_df = pd.DataFrame(columns=new_cols)

gammas = [round(a, 2) for a in np.arange(0, 1.01, 0.1)]

#epsilons = [0.00002, 0.0002, 0.002, 0.02, 0.2]
#chis = [0.00002, 0.0002, 0.002, 0.02, 0.2]
alphas = [0.00002, 0.0002, 0.002, 0.02, 0.2]

for gamma in gammas:
    #for epsilon in epsilons:
    #for chi in chis:
    for alpha in alphas:
        #super_filtered = filtered[(filtered.gamma == gamma) & (filtered.eps == epsilon)]
        #super_filtered = filtered[(filtered.gamma == gamma) & (filtered.chi == chi)]
        super_filtered = filtered[(filtered.gamma == gamma) & (filtered.alpha == alpha)]
        
        mean_acr_nice: float = super_filtered[super_filtered.EBNorm=="IS Empirical"].acr.mean()
        mean_acr_normal: float = super_filtered[super_filtered.EBNorm=="IS normal"].acr.mean()
            
        #new_df.loc[len(new_df)] = [gamma, epsilon, mean_acr_nice-mean_acr_normal]
        #new_df.loc[len(new_df)] = [gamma, chi, mean_acr_nice-mean_acr_normal]
        new_df.loc[len(new_df)] = [gamma, alpha, mean_acr_nice-mean_acr_normal]
        
feature: str = "acr" + ":Q"
            
heatmap = alt.Chart(new_df).mark_rect().encode(
    #x='eps:O',
    #x='chi:O',
    x='alpha:O',
    y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
    color=alt.Color(feature, scale=alt.Scale(domain=[-63,63], scheme='blueorange'), sort="descending")
    #color=feature
).properties(
    height=400,
    width=400,
    title = "ACR Difference between EB-IS and IS"
)

text = heatmap.mark_text().encode(
    text=alt.Text(feature, format=".2f"),
    color=alt.value('black')
)

heatmap + text
