In [45]:
#!pip install altair pandas numpy
import altair as alt
import pandas as pd
import numpy as np
# norms
# mean sj: (1,1,0,0,0,0,1,1)
# nice sj: (1,1,0,1,0,1,1,1)

ebnorms_b = ["(1,1,0,0,0,0,1,1)", "(1,1,0,1,0,1,1,1)", "(1,1,1,0,1,0,1,1)", "(0,0,0,0,1,1,1,1)", "(0,1,0,1,1,1,1,1)", "(0,1,1,1,0,1,1,1)",
          "(1,1,0,0,1,1,1,1)", "(1,1,0,1,1,1,1,1)", "(0,0,0,0,0,0,1,1)", "(0,1,0,1,0,1,1,1)", "(0,0,0,1,0,1,1,1)"]

ebnorms_n = ["SJ normal", "SJ nice", "SJ Mean", "IS normal", "IS nice", "IS weird",
             "SS normal", "SS nice", "SH normal", "SH normal", "IS Empirical"]

norms_simple_b = ["(1,0,0,1)", "(1,0,1,1)", "(0,0,0,1)", "(0,0,1,1)"]

norms_simple_n = ["SJ", "SS", "SH", "IS"]

columns = ["EBNorm", "Norm", "z", "g", "mu", "chi", "eps", "alpha", "gamma", "pdx", "acr", "B", "G", "AllD", "pDisc", "Disc", "AllC", "mean", "nice"]

def load_and_process() -> pd.DataFrame: 
    data: pd.DataFrame = pd.read_csv("outputs/results.txt", sep="\t", header=None, names=columns, index_col=False)
    data.eps = data.eps / data.z
    data.chi = data.chi / data.z
    data.alpha = data.alpha / data.z
    data.dropna()
    data.EBNorm = data.EBNorm.replace(ebnorms_b, ebnorms_n)
    data.Norm = data.Norm.replace(norms_simple_b, norms_simple_n)
    #data = data.tail(50000)
    return data

def filter(frame: pd.DataFrame, feature: str, condition) -> pd.DataFrame:
    return frame[frame[feature]==condition]
    
data = load_and_process()


In [61]:
data.eps.value_counts()

0.00002    165665
0.02000      4933
0.00200      4412
0.20000      4400
0.00020      4397
0.01800      1055
0.01840      1055
0.01820      1054
0.01860      1051
0.01880      1049
0.01900      1049
0.01920       759
0.01960       528
0.01940       527
0.01980       526
Name: eps, dtype: int64

In [36]:
chosen_norm: str = "IS Empirical"
filtered = data[data.EBNorm == chosen_norm]
filtered = filtered[filtered.Norm == "IS"]
filtered = filter(filtered, "z", 50)
filtered = filter(filtered, "eps", 0.00002)
filtered = filter(filtered, "chi", 0.00002)
#filtered = filter(filtered, "alpha", 0.00002)

#new_cols = ["gamma", "eps", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
#new_cols = ["gamma", "chi", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
new_cols = ["gamma", "alpha", "acr", "g_reps", "nice_eps", "AllD", "pDisc", "Disc", "AllC"]
new_df = pd.DataFrame(columns=new_cols)

gammas = [round(a, 2) for a in np.arange(0, 1.01, 0.1)]
epsilons = [0.00002, 0.0002, 0.002, 0.02, 0.2]
chis = [0.00002, 0.0002, 0.002, 0.02, 0.2]
alphas = [0.00002, 0.0002, 0.002, 0.02, 0.2]

for gamma in gammas:
    #for epsilon in epsilons:
    #for chi in chis:
    for alpha in alphas:
        #super_filtered = filtered[(filtered.gamma == gamma) & (filtered.eps == epsilon)]
        #super_filtered = filtered[(filtered.gamma == gamma) & (filtered.chi == chi)]
        super_filtered = filtered[(filtered.gamma == gamma) & (filtered.alpha == alpha)]
        
        mean_acr: float = super_filtered.acr.mean()
        mean_greps: float = super_filtered.G.mean()
        mean_niceeps: float = super_filtered.nice.mean()
    
        mean_AllC: float = super_filtered.AllC.mean()
        mean_Disc: float = super_filtered.Disc.mean()
        mean_AllD: float = super_filtered.AllD.mean()
        mean_pDisc: float = super_filtered.pDisc.mean()
        #new_df.loc[len(new_df)] = [gamma, epsilon, mean_acr, mean_greps, mean_niceeps, mean_AllD, mean_pDisc, mean_Disc, mean_AllC]
        #new_df.loc[len(new_df)] = [gamma, chi, mean_acr, mean_greps, mean_niceeps, mean_AllD, mean_pDisc, mean_Disc, mean_AllC]
        new_df.loc[len(new_df)] = [gamma, alpha, mean_acr, mean_greps, mean_niceeps, mean_AllD, mean_pDisc, mean_Disc, mean_AllC]

feature: str = "acr" + ":Q"
            
heatmap = alt.Chart(new_df).mark_rect().encode(
    #x='chi:O',
    #x='eps:O',
    x='alpha:O',
    y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
    color=alt.Color(feature, scale=alt.Scale(domain=[0,90]))
).properties(
    height=400,
    width=400,
    title = chosen_norm
)

text = heatmap.mark_text().encode(
    text=alt.Text(feature, format=".2f"),
    color=alt.value('black')
)

#heatmap+text

## DIFFERENCE
### gamma in [0, 1] +0.1


In [37]:
data = load_and_process()

chosen_parameter = "eps"

# Initial filtering
filtered = filter(data, "z", 50)
filtered = filter(filtered, "eps", 0.00002) if chosen_parameter != "eps" else filtered
filtered = filter(filtered, "chi", 0.00002) if chosen_parameter != "chi" else filtered
filtered = filter(filtered, "alpha", 0.00002) if chosen_parameter != "alpha" else filtered

# Define columns based on the chosen parameter
new_cols = ["gamma", chosen_parameter, "acr"]
new_df = pd.DataFrame(columns=new_cols)

gammas = [round(a, 2) for a in np.arange(0, 1.01, 0.1)]

# Define values based on the chosen parameter
parameter_values = [0.00002, 0.0002, 0.002, 0.02, 0.2]

for gamma in gammas:
    for parameter_value in parameter_values:
        super_filtered = filtered[(filtered.gamma == gamma) & (filtered[chosen_parameter] == parameter_value)]
        
        mean_acr_nice = super_filtered[super_filtered.EBNorm == "IS Empirical"].acr.mean()
        mean_acr_normal = super_filtered[super_filtered.EBNorm == "IS normal"].acr.mean()
        
        new_df.loc[len(new_df)] = [gamma, parameter_value, mean_acr_nice - mean_acr_normal, super_filtered.shape[1]]

# Define feature for Altair chart
feature = "acr:Q"

# Create Altair chart
heatmap = alt.Chart(new_df).mark_rect().encode(
    x=f"{chosen_parameter}:O",
    y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
    color=alt.Color(feature, scale=alt.Scale(domain=[-63, 63], scheme='blueorange'), sort="descending")
).properties(
    height=400,
    width=400,
    title="ACR Difference between EB-IS and IS"
)

# Add text annotations to the chart
text = heatmap.mark_text().encode(
    text=alt.Text(feature, format=".2f"),
    color=alt.value('black')
)

# Display the chart with text annotations
heatmap + text


Columns (9) have mixed types. Specify dtype option on import or set low_memory=False.



ValueError: cannot set a row with mismatched columns

## Difference
### gamma in [0.9, 1] +0.01

In [38]:
def heatmap(chosen_parameter, absolute=False):
    data = load_and_process()

    # Initial filtering
    filtered = filter(data, "z", 50)
    filtered = filter(filtered, "eps", 0.00002) if chosen_parameter != "eps" else filtered
    filtered = filter(filtered, "chi", 0.00002) if chosen_parameter != "chi" else filtered
    filtered = filter(filtered, "alpha", 0.00002) if chosen_parameter != "alpha" else filtered
    
    print("filtered size = ", filtered.shape)

    # Define columns based on the chosen parameter
    new_cols = ["gamma", chosen_parameter, "acr", "acr_diff", "samples", "standard_deviation"]
    new_df = pd.DataFrame(columns=new_cols)

    gammas = [round(a, 2) for a in np.arange(0, 1.01, 0.1)]
    #gammas = filtered.gamma.unique()

    # Define values based on the chosen parameter
    parameter_values = [0.00002, 0.0002, 0.002, 0.02, 0.2]

    for gamma in gammas:
        for parameter_value in parameter_values:
            super_filtered = filtered[(filtered.gamma == gamma) & (filtered[chosen_parameter] == parameter_value)]
            mean_acr_nice = super_filtered[super_filtered.EBNorm == "IS Empirical"].acr.mean()
            mean_acr_normal = super_filtered[super_filtered.EBNorm == "IS normal"].acr.mean()
            
            std_acr_nice = super_filtered[super_filtered.EBNorm == "IS Empirical"].acr.std()

            new_df.loc[len(new_df)] = [gamma, parameter_value, mean_acr_nice, mean_acr_nice - mean_acr_normal, super_filtered.shape[0], std_acr_nice]

    # Define feature for Altair chart
    feature = "acr:Q" if absolute else "acr_diff:Q"
    domain = [0, 100] if absolute else [-20, 80] 

    # Create Altair chart
    heatmap = alt.Chart(new_df).mark_rect().encode(
        x=f"{chosen_parameter}:O",
        y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
        color=alt.Color(feature, scale=alt.Scale(domain=domain, scheme='blueorange'), sort="descending")
    ).properties(
        height=400,
        width=400,
        title="ACR Difference between EB-IS and IS"
    )

    # Add text annotations to the chart
    text = heatmap.mark_text().encode(
        text=alt.Text(feature, format=".2f"),
        color=alt.value('black')
    )

    # Display the chart with text annotations
    #heatmap + text
    return new_df, (heatmap + text)

In [47]:
chi_df, c = heatmap("chi")
alpha_df, c = heatmap("alpha")
eps_df, c = heatmap("eps")

filtered size =  (61538, 19)
filtered size =  (135966, 19)
filtered size =  (58634, 19)


In [48]:
alpha_df

Unnamed: 0,gamma,alpha,acr,acr_diff,samples,standard_deviation
0,0.0,2e-05,17.923854,0.060565,904.0,15.004806
1,0.0,0.0002,19.629362,-2.1e-05,299.0,15.387152
2,0.0,0.002,18.097475,2.286795,299.0,14.381806
3,0.0,0.02,11.437927,-0.332629,400.0,8.560451
4,0.0,0.2,5.015794,0.716055,298.0,6.00046
5,0.1,2e-05,29.990949,10.374778,903.0,27.940624
6,0.1,0.0002,32.94954,13.446818,299.0,29.627791
7,0.1,0.002,26.510328,8.351669,299.0,26.130788
8,0.1,0.02,15.702494,3.321504,399.0,11.319834
9,0.1,0.2,4.606155,-0.254731,300.0,6.00649


In [55]:
import plotly.graph_objects as go
import pandas as pd

# Your dataset
df = pd.DataFrame(chi_df)

z_feature = 'acr_diff'

# Reshape data for contour plot
z_values = df.pivot_table(index='gamma', columns=df.columns[1], values=z_feature).values
x_values = sorted(df[df.columns[1]].unique())
y_values = sorted(df['gamma'].unique())

# Create contour plot
fig = go.Figure(
    data=go.Contour(
        z=z_values,
        x=x_values,
        y=y_values,
        zmin=-65,
        zmax=65,
        colorscale='RdBu',
        #contours_coloring='lines',
        contours={
            "showlabels":True
        }
    )
)
fig.update_xaxes(type='log')
fig.update_traces(ncontours=15, selector=dict(type='contour'))
title = z_feature + " gamma x " + df.columns[1]
fig.update_layout(title=title)
fig.update_layout(
    title=title,
    width=800,  # Set the width of the plot
    height=800,  # Set the height of the plot
    xaxis_title=df.columns[1],  # Add x-axis label
    yaxis_title='Gamma',  # Add y-axis label
    font=dict(size=18)
)


# Show the plot
fig.show()

In [66]:
data = load_and_process()

value = 0.0002
filtered = filter(data, "chi", value)
filtered = filter(filtered, "alpha", value)
filtered = filter(filtered, "eps", value)
filtered

Unnamed: 0,EBNorm,Norm,z,g,mu,chi,eps,alpha,gamma,pdx,acr,B,G,AllD,pDisc,Disc,AllC,mean,nice


In [67]:
data = load_and_process()
data[data.gamma==0.35]

Unnamed: 0,EBNorm,Norm,z,g,mu,chi,eps,alpha,gamma,pdx,acr,B,G,AllD,pDisc,Disc,AllC,mean,nice


In [50]:
def param_heatmap(chosen_parameter: str, feature: str, norm: str):
    data = load_and_process()

    # Initial filtering
    filtered = filter(data, "z", 50)
    filtered = filter(filtered, "eps", 0.00002) if chosen_parameter != "eps" else filtered
    filtered = filter(filtered, "chi", 0.00002) if chosen_parameter != "chi" else filtered
    filtered = filter(filtered, "alpha", 0.00002) if chosen_parameter != "alpha" else filtered
    
    filtered = filter(filtered, "EBNorm", norm)

    # Define columns based on the chosen parameter
    new_cols = ["gamma", chosen_parameter, "value"]
    new_df = pd.DataFrame(columns=new_cols)

    #gammas = [round(a, 2) for a in np.arange(0.9, 1.01, 0.01)]
    gammas = filtered.gamma.unique()

    # Define values based on the chosen parameter
    parameter_values = [0.00002, 0.0002, 0.002, 0.02, 0.2]

    for gamma in gammas:
        for parameter_value in parameter_values:
            super_filtered = filtered[(filtered.gamma == gamma) & (filtered[chosen_parameter] == parameter_value)]

            feature_mean: float = super_filtered[feature].mean()

            new_df.loc[len(new_df)] = [gamma, parameter_value, feature_mean]

    # Define feature for Altair chart
    view_feature = "value:Q"

    # Create Altair chart
    heatmap = alt.Chart(new_df).mark_rect().encode(
        x=f"{chosen_parameter}:O",
        y=alt.Y('gamma:O', sort=alt.EncodingSortField('gamma', order='descending')),
        color=alt.Color(view_feature, scale=alt.Scale(domain=[0, 1], scheme='blueorange'), sort="ascending")
    ).properties(
        height=400,
        width=400,
        title=chosen_parameter + " x " + feature + " x " + norm
    )

    # Add text annotations to the chart
    text = heatmap.mark_text().encode(
        text=alt.Text(view_feature, format=".2f"),
        color=alt.value('black')
    )

    # Display the chart with text annotations
    #heatmap + text
    return new_df, (heatmap + text)

In [43]:
d, c = param_heatmap("alpha", "AllD", "IS Empirical")
c


Columns (9) have mixed types. Specify dtype option on import or set low_memory=False.



In [28]:
chi_line_df = filter(chi_df[chi_df.gamma >= 0.9], "chi", 0.0002)
alpha_line_df = filter(alpha_df[alpha_df.gamma >= 0.9], "alpha", 0.0002)
eps_line_df = filter(eps_df[eps_df.gamma >= 0.9], "eps", 0.0002)


chi_line = alt.Chart(chi_line_df).mark_line(color="green").encode(
    x="gamma",
    y="acr_diff"
)
alpha_line = alt.Chart(alpha_line_df).mark_line(color="red").encode(
    x="gamma",
    y="acr_diff"
)
eps_line = alt.Chart(eps_line_df).mark_line(color="blue").encode(
    x="gamma",
    y="acr_diff"
)
chi_line + alpha_line + eps_line