In [6]:
!pip install altair pandas numpy
import altair as alt
import pandas as pd
import numpy as np
# norms
# mean sj: (1,1,0,0,0,0,1,1)
# nice sj: (1,1,0,1,0,1,1,1)

ebnorms_b = ["(1,1,0,0,0,0,1,1)", "(1,1,0,1,0,1,1,1)", "(1,1,1,0,1,0,1,1)", "(0,0,0,0,1,1,1,1)", "(0,1,0,1,1,1,1,1)", "(0,1,1,1,0,1,1,1)",
          "(1,1,0,0,1,1,1,1)", "(1,1,0,1,1,1,1,1)", "(0,0,0,0,0,0,1,1)", "(0,1,0,1,0,1,1,1)", "(0,0,0,1,0,1,1,1)"]

ebnorms_n = ["SJ normal", "SJ nice", "SJ Mean", "IS normal", "IS nice", "IS weird",
             "SS normal", "SS nice", "SH normal", "SH normal", "IS Empirical"]

norms_simple_b = ["(1,0,0,1)", "(1,0,1,1)", "(0,0,0,1)", "(0,0,1,1)"]

norms_simple_n = ["SJ", "SS", "SH", "IS"]

columns = ["EBNorm", "Norm", "z", "g", "mu", "chi", "eps", "alpha", "gamma", "pdx", "acr", "B", "G", "AllD", "pDisc", "Disc", "AllC", "mean", "nice"]

def load_and_process() -> pd.DataFrame: 
    data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)
    data.eps = data.eps / data.z
    data.chi = data.chi / data.z
    data.alpha = data.alpha / data.z
    data.dropna()
    data.EBNorm = data.EBNorm.replace(ebnorms_b, ebnorms_n)
    data.Norm = data.Norm.replace(norms_simple_b, norms_simple_n)
    return data

def filter(frame: pd.DataFrame, feature: str, condition) -> pd.DataFrame:
    return frame[frame[feature]==condition]
    
data = load_and_process()




  data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)


In [9]:
data.eps.value_counts()

0.00002    53827
0.02000     2741
0.00200     2218
0.20000     2210
0.00020     2204
0.01800     1055
0.01840     1055
0.01820     1054
0.01860     1051
0.01880     1049
0.01900     1049
0.01920      759
0.01960      528
0.01940      527
0.01980      526
Name: eps, dtype: int64

In [56]:
chosen_norm: str = "IS Empirical"
filtered = data[data.EBNorm == chosen_norm]
filtered = filtered[filtered.Norm == "IS"]
filtered = filter(filtered, "z", 50)
filtered = filter(filtered, "eps", 0.00002)
filtered = filter(filtered, "chi", 0.00002)
#filtered = filter(filtered, "alpha", 0.00002)

#new_cols = ["gamma", "eps", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
#new_cols = ["gamma", "chi", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
new_cols = ["gamma", "alpha", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
new_df = pd.DataFrame(columns=new_cols)

gammas = [round(a, 2) for a in np.arange(0, 1.01, 0.1)]
epsilons = [0.00002, 0.0002, 0.002, 0.02, 0.2]
chis = [0.00002, 0.0002, 0.002, 0.02, 0.2]
alphas = [0.00002, 0.0002, 0.002, 0.02, 0.2]

for gamma in gammas:
    #for epsilon in epsilons:
    #for chi in chis:
    for alpha in alphas:
        #super_filtered = filtered[(filtered.gamma == gamma) & (filtered.eps == epsilon)]
        #super_filtered = filtered[(filtered.gamma == gamma) & (filtered.chi == chi)]
        super_filtered = filtered[(filtered.gamma == gamma) & (filtered.alpha == alpha)]
        
        mean_acr: float = super_filtered.acr.mean()
        mean_greps: float = super_filtered.G.mean()
        mean_niceeps: float = super_filtered.nice.mean()
    
        mean_AllC: float = super_filtered.AllC.mean()
        mean_Disc: float = super_filtered.Disc.mean()
        mean_AllD: float = super_filtered.AllD.mean()
        mean_pDisc: float = super_filtered.pDisc.mean()
        #new_df.loc[len(new_df)] = [gamma, epsilon, mean_acr, mean_greps, mean_niceeps, mean_AllD, mean_pDisc, mean_Disc, mean_AllC]
        #new_df.loc[len(new_df)] = [gamma, chi, mean_acr, mean_greps, mean_niceeps, mean_AllD, mean_pDisc, mean_Disc, mean_AllC]
        new_df.loc[len(new_df)] = [gamma, alpha, mean_acr, mean_greps, mean_niceeps, mean_AllD, mean_pDisc, mean_Disc, mean_AllC]

feature: str = "acr" + ":Q"
            
heatmap = alt.Chart(new_df).mark_rect().encode(
    #x='chi:O',
    #x='eps:O',
    x='alpha:O',
    y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
    color=alt.Color(feature, scale=alt.Scale(domain=[0,90]))
).properties(
    height=400,
    width=400,
    title = chosen_norm
)

text = heatmap.mark_text().encode(
    text=alt.Text(feature, format=".2f"),
    color=alt.value('black')
)

#heatmap+text

## DIFFERENCE
### gamma in [0, 1] +0.1


In [58]:
data = load_and_process()

chosen_parameter = "alpha"

# Initial filtering
filtered = filter(data, "z", 50)
filtered = filter(filtered, "eps", 0.00002) if chosen_parameter != "eps" else filtered
filtered = filter(filtered, "chi", 0.00002) if chosen_parameter != "chi" else filtered
filtered = filter(filtered, "alpha", 0.00002) if chosen_parameter != "alpha" else filtered

# Define columns based on the chosen parameter
new_cols = ["gamma", chosen_parameter, "acr"]
new_df = pd.DataFrame(columns=new_cols)

gammas = [round(a, 2) for a in np.arange(0, 1.01, 0.1)]

# Define values based on the chosen parameter
parameter_values = [0.00002, 0.0002, 0.002, 0.02, 0.2]

for gamma in gammas:
    for parameter_value in parameter_values:
        super_filtered = filtered[(filtered.gamma == gamma) & (filtered[chosen_parameter] == parameter_value)]
        
        mean_acr_nice = super_filtered[super_filtered.EBNorm == "IS Empirical"].acr.mean()
        mean_acr_normal = super_filtered[super_filtered.EBNorm == "IS normal"].acr.mean()
        
        new_df.loc[len(new_df)] = [gamma, parameter_value, mean_acr_nice - mean_acr_normal]

# Define feature for Altair chart
feature = "acr:Q"

# Create Altair chart
heatmap = alt.Chart(new_df).mark_rect().encode(
    x=f"{chosen_parameter}:O",
    y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
    color=alt.Color(feature, scale=alt.Scale(domain=[-63, 63], scheme='blueorange'), sort="descending")
).properties(
    height=400,
    width=400,
    title="ACR Difference between EB-IS and IS"
)

# Add text annotations to the chart
text = heatmap.mark_text().encode(
    text=alt.Text(feature, format=".2f"),
    color=alt.value('black')
)

# Display the chart with text annotations
heatmap + text

  data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)


## Difference
### gamma in [0.9, 1] +0.01

In [148]:
def heatmap(chosen_parameter):
    data = load_and_process()

    # Initial filtering
    filtered = filter(data, "z", 50)
    filtered = filter(filtered, "eps", 0.00002) if chosen_parameter != "eps" else filtered
    filtered = filter(filtered, "chi", 0.00002) if chosen_parameter != "chi" else filtered
    filtered = filter(filtered, "alpha", 0.00002) if chosen_parameter != "alpha" else filtered

    # Define columns based on the chosen parameter
    new_cols = ["gamma", chosen_parameter, "acr_diff"]
    new_df = pd.DataFrame(columns=new_cols)

    #gammas = [round(a, 2) for a in np.arange(0.9, 1.01, 0.01)]
    gammas = filtered.gamma.unique()

    # Define values based on the chosen parameter
    parameter_values = [0.00002, 0.0002, 0.002, 0.02, 0.2]

    for gamma in gammas:
        for parameter_value in parameter_values:
            super_filtered = filtered[(filtered.gamma == gamma) & (filtered[chosen_parameter] == parameter_value)]

            mean_acr_nice = super_filtered[super_filtered.EBNorm == "IS Empirical"].acr.mean()
            #mean_acr_normal = super_filtered[super_filtered.EBNorm == "IS normal"].acr.mean()

            new_df.loc[len(new_df)] = [gamma, parameter_value, mean_acr_nice]

    # Define feature for Altair chart
    feature = "acr_diff:Q"

    # Create Altair chart
    heatmap = alt.Chart(new_df).mark_rect().encode(
        x=f"{chosen_parameter}:O",
        y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
        #color=alt.Color(feature, scale=alt.Scale(domain=[-63, 63], scheme='blueorange'), sort="descending")
        color=alt.Color(feature, scale=alt.Scale(domain=[0, 100], scheme='blueorange'), sort="descending")
    ).properties(
        height=400,
        width=400,
        title="ACR Difference between EB-IS and IS"
    )

    # Add text annotations to the chart
    text = heatmap.mark_text().encode(
        text=alt.Text(feature, format=".2f"),
        color=alt.value('black')
    )

    # Display the chart with text annotations
    #heatmap + text
    return new_df, (heatmap + text)

In [149]:
chi_df, chart = heatmap("eps")
chart

  data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)


In [81]:
chi_df, c = heatmap("chi")
alpha_df, c = heatmap("alpha")
eps_df, c = heatmap("eps")

  data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)
  data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)
  data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)


In [82]:
eps_df

Unnamed: 0,gamma,eps,acr_diff
0,0.00,0.00002,1.553649
1,0.00,0.00020,-3.822173
2,0.00,0.00200,-2.763666
3,0.00,0.02000,-0.671216
4,0.00,0.20000,-0.145253
...,...,...,...
95,0.99,0.00002,44.499267
96,0.99,0.00020,53.153430
97,0.99,0.00200,41.275295
98,0.99,0.02000,10.295527


In [124]:
chi_line_df = filter(chi_df, "chi", 0.0002)
alpha_line_df = filter(alpha_df, "alpha", 0.0002)
eps_line_df = filter(eps_df, "eps", 0.0002)

chi_line = alt.Chart(chi_line_df).mark_line(color="green").encode(
    x="gamma",
    y="acr_diff"
)
alpha_line = alt.Chart(alpha_line_df).mark_line(color="red").encode(
    x="gamma",
    y="acr_diff"
)
eps_line = alt.Chart(eps_line_df).mark_line(color="blue").encode(
    x="gamma",
    y="acr_diff"
)
chi_line + alpha_line + eps_line

In [138]:
def param_heatmap(chosen_parameter: str, feature: str, norm: str):
    data = load_and_process()

    # Initial filtering
    filtered = filter(data, "z", 50)
    filtered = filter(filtered, "eps", 0.00002) if chosen_parameter != "eps" else filtered
    filtered = filter(filtered, "chi", 0.00002) if chosen_parameter != "chi" else filtered
    filtered = filter(filtered, "alpha", 0.00002) if chosen_parameter != "alpha" else filtered
    
    filtered = filter(filtered, "EBNorm", norm)

    # Define columns based on the chosen parameter
    new_cols = ["gamma", chosen_parameter, "value"]
    new_df = pd.DataFrame(columns=new_cols)

    #gammas = [round(a, 2) for a in np.arange(0.9, 1.01, 0.01)]
    gammas = filtered.gamma.unique()

    # Define values based on the chosen parameter
    parameter_values = [0.00002, 0.0002, 0.002, 0.02, 0.2]

    for gamma in gammas:
        for parameter_value in parameter_values:
            super_filtered = filtered[(filtered.gamma == gamma) & (filtered[chosen_parameter] == parameter_value)]

            feature_mean: float = super_filtered[feature].mean()

            new_df.loc[len(new_df)] = [gamma, parameter_value, feature_mean]

    # Define feature for Altair chart
    view_feature = "value:Q"

    # Create Altair chart
    heatmap = alt.Chart(new_df).mark_rect().encode(
        x=f"{chosen_parameter}:O",
        y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
        color=alt.Color(view_feature, scale=alt.Scale(domain=[0, 1], scheme='blueorange'), sort="ascending")
    ).properties(
        height=400,
        width=400,
        title=chosen_parameter + " x " + feature + " x " + norm
    )

    # Add text annotations to the chart
    text = heatmap.mark_text().encode(
        text=alt.Text(view_feature, format=".2f"),
        color=alt.value('black')
    )

    # Display the chart with text annotations
    #heatmap + text
    return new_df, (heatmap + text)

In [143]:
d, c = param_heatmap("alpha", "AllD", "IS Empirical")
c

  data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)


In [121]:
d

Unnamed: 0,gamma,eps,value
0,0.00,0.00002,0.743684
1,0.00,0.00020,0.702745
2,0.00,0.00200,0.798824
3,0.00,0.02000,0.889697
4,0.00,0.20000,0.819608
...,...,...,...
95,0.99,0.00002,0.285135
96,0.99,0.00020,0.175510
97,0.99,0.00200,0.335600
98,0.99,0.02000,0.696400


In [10]:
data = load_and_process()

data = filter(data, "eps", 0.02)
#data = filter(data, "alpha", 0.02)
#data = filter(data, "chi", 0.02)

data.alpha.value_counts()

  data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)


0.00002    2741
Name: alpha, dtype: int64