# Guided-MT Code2Vec Evaluation

This Notebook runs over the experiment-outputs, extracts data and makes plots.

Expected Layout:

```
.
├── README.md
├── data
│   └── random-MRR-max
│       ├── seed-2880
│       │   ├── data
│       │   │   ├── gen0
│       │   │   │   ├── 3b2459
│       │   │   │   ├── 3b2459.json
│       │   │   │   ├── 447e22
│       │   │   │   ├── 447e22.json
│       │   │   │   ├── 4495c7
│       │   │   │   ├── 4495c7.json
│       │   │   │   ├── 52667b
│       │   │   │   ├── 52667b.json
│       │   │   │   ├── 6855ba
│       │   │   │   ├── 6855ba.json
│       │   │   │   ├── 68ec75
│       │   │   │   ├── 68ec75.json
│       │   │   │   ├── 6cc14d
│       │   │   │   ├── 6cc14d.json
│       │   │   │   ├── 6d6845
│       │   │   │   ├── 6d6845.json
│       │   │   │   ├── 7a2d67
│       │   │   │   ├── 7a2d67.json
│       │   │   │   ├── ed0dd9
│       │   │   │   └── ed0dd9.json
│       │   │   ├── gen1
│       │   │   ├── ...
│       │   │   ├── gen8
│       │   │   ├── ...
│       │   │   ├── generation_0
│       │   │   │   ├── Some.java
│       │   │   │   ├── ...
│       │   │   │   ├── Other.java
│       │   │   │   └── Different.java
│       │   │   └── initialGen
│       │   │       └── 3bf9ce
│       │   └── results.txt
│       ├── seed-5142
│           └── results.txt
│       ...
├── evaluation.ipynb
└── requirements.txt
```

## Data Loading

Most of this is done in the nearby extract script, but we also extract some highlevel variables.

In [None]:
import pandas as pd
import seaborn as sbn
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate

import extract


# Important: Specify Directory without / at the end!
directory:str = "./data"

In [None]:
%%time
df = extract.make_df(directory)

# Correct Naming Mistakes:
def flip_min_and_max(to_flip: str) -> str:
    mapping = {
        "F1-min": "F1-max", "F1-max": "F1-min", "random-F1-min": "random-F1-max","random-F1-max": "random-F1-min",
        "MRR-min": "MRR-max","MRR-max":"MRR-min", "random-MRR-min": "random-MRR-max", "random-MRR-max": "random-MRR-min"}
    # Paretos are not done, as they were not useful for RQs at the point of writing.
    return mapping.get(to_flip,to_flip)

df["experiment"] = df["experiment"].apply(flip_min_and_max)

# Remove Pareto Experiments, they are a bit wacky
# ~ is the "invert" operator and changes true to false and false to true for series
df = df[~df['experiment'].str.contains("pareto")]
# Reduce categories to only existing ones - no "ghost categories" after removing paretos
df['experiment'] = df['experiment'].astype(str)
df['experiment'] = df['experiment'].astype("category")

In [None]:
all_metrics = ["F1","MRR","EDITDIST","PMRR","REC","PREC"]
all_transformers = extract.get_known_transformers()
all_experiments = set(df["experiment"])
all_seeds = set(df["seed"])

In [None]:
df.to_csv("./results.csv")
df.head(5)

In [None]:
#TODO: There are random exps in gen 10 that have 1 transformer?

In [None]:
broader_grouped_df = df.groupby(["experiment","generation"]).mean().reset_index()
broader_grouped_df["algorithm"] = broader_grouped_df["experiment"].apply(lambda x: "random" if "random" in x else "genetic")
# Remove Pareto Experiments, they are a bit wacky
# ~ is the "invert" operator and changes true to false and false to true for series
broader_grouped_df[~broader_grouped_df['experiment'].str.contains("pareto")]

broader_grouped_df.head(5)

## Per Experiment Plots

In [None]:
fig, (ax1,ax2) = plt.subplots(nrows=2,sharex=True)

sbn.lineplot(data=broader_grouped_df,x="generation",y="F1", style="algorithm",hue="experiment",ax=ax1,legend=None,markers="o")
ax1.set_title("MRR and F1 over experiments")
ax1.set_ylim([0.35,0.7])
ax1.set_xlim([0,26])

sbn.lineplot(data=broader_grouped_df,x="generation",y="MRR",style="algorithm", hue="experiment",ax=ax2,markers="o")
ax2.set_ylim([0.35,0.7])
ax2.set_xlim([0,26])

ax2.legend(bbox_to_anchor=(1.05, 2))

fig.subplots_adjust(right=1.2) # <-- Secret Number measured for this plot to work for the export

plt.savefig("./figures/development-f1-mrr-all-experiments.png",bbox_inches="tight")
plt.show()

In [None]:
helper_df = broader_grouped_df[broader_grouped_df["algorithm"]=="genetic"]
used_experiments = helper_df["experiment"].unique().to_list()
fig, axs = plt.subplots(nrows=2,sharex=True)

sbn.lineplot(data=helper_df,x="generation",y="F1",hue="experiment",ax=axs[0],legend=None,marker='o',hue_order=used_experiments)
axs[0].set_title("MRR and F1 over genetic experiments")
axs[0].set_ylim([0.35,0.7])

sbn.lineplot(data=helper_df,x="generation",y="MRR", hue="experiment",ax=axs[1],marker='o',hue_order=used_experiments)
axs[1].set_ylim([0.35,0.7])

plt.legend(title="Experiment",bbox_to_anchor=(1.45,1.45))
#plt.legend(bbox_to_anchor=(1.45, 1.45))

plt.savefig("./figures/development-f1-mrr-all-genetic-experiments.png")
plt.show()
del helper_df, used_experiments

In [None]:
helper_df = df[~df["experiment"].str.contains("random")]
used_experiments = helper_df["experiment"].unique().to_list()

sbn.lineplot(helper_df,y="F1",x="generation",estimator=np.median,hue="experiment",hue_order=used_experiments)

plt.legend(loc="lower left")

plt.title("Development of F1-Score (Median)")
plt.xlim(0,16)

plt.savefig("./figures/development-f1-all-genetic.png")
plt.show()

del helper_df,used_experiments

In [None]:
helper_df = df[(df["experiment"] == "F1-min") | (df["experiment"] == "random-F1-min") ]
used_experiments = helper_df["experiment"].unique().to_list()

sbn.lineplot(helper_df,y="F1",x="generation",estimator=np.median,hue="experiment",hue_order=used_experiments)

plt.legend(loc="lower left")

plt.title("Development of F1-Score (Median)")
plt.xlim(0,16)

plt.savefig("./figures/development-f1-min-median-random-and-genetic.png")
plt.show()
del helper_df,used_experiments

In [None]:
helper_df = broader_grouped_df[broader_grouped_df["algorithm"]=="genetic"]
used_experiments = helper_df["experiment"].unique().to_list()
sbn.lineplot(data=helper_df,x="generation",y="F1",hue="experiment",markers=True, hue_order=used_experiments)
plt.title("F1 over genetic experiments")
plt.ylim([0.35,0.55])
plt.xlim([0,20])

plt.savefig("./figures/development-f1-all-genetic-experiments.png")
plt.show()
del helper_df,used_experiments

In [None]:
helper_df = broader_grouped_df[broader_grouped_df["algorithm"]=="genetic"]
used_experiments = helper_df["experiment"].unique().to_list()
sbn.lineplot(data=helper_df,x="generation",y="MRR",hue="experiment",markers=True, hue_order=used_experiments)
plt.title("MRR over genetic experiments")
plt.ylim([0.3,0.7])

plt.savefig("./figures/development-mrr-all-genetic-experiments.png")
plt.show()
del helper_df,used_experiments

In [None]:
#labels = all_transformers
labels = range(0,26)
target_exp = "F1-min"
width = 0.5
ts = []

helper_df= broader_grouped_df[broader_grouped_df["experiment"]==target_exp]
helper_df = helper_df.sort_values("generation")

fig, ax = plt.subplots()

counter = 0
for transformer_name in all_transformers:
    ts.append(list(helper_df[transformer_name].fillna(0)))

    array_size = len(ts[0])
    current_bottom = [0] * array_size
    for x in range(0,array_size):
        current_bottom[x] = sum([ts[i][x] for i in range(0,counter)])

    if counter == 0:
        ax.bar(labels, ts[counter], width, label=transformer_name)
    else:
        ax.bar(labels, ts[counter], width, label=transformer_name, bottom=current_bottom)
    counter = counter + 1

ax.set_xlim([0,15])
ax.set_ylabel('Transformers')
ax.set_xlabel('Generation')
ax.set_title('Transformers by Type and Generation for F1-max')
ax.legend()

plt.savefig("./figures/f1-min-transformer-mix.png")
plt.show()

del helper_df,labels,target_exp,fig,ax,width,array_size,ts

In [None]:
#labels = all_transformers
labels = range(0,26)
target_exp = "random-F1-max"
width = 0.5
ts = []

helper_df= broader_grouped_df[broader_grouped_df["experiment"]==target_exp]
helper_df = helper_df.sort_values("generation")

fig, ax = plt.subplots()

counter = 0
for transformer_name in all_transformers:
    ts.append(list(helper_df[transformer_name].fillna(0)))

    array_size = len(ts[0])
    current_bottom = [0] * array_size
    for x in range(0,array_size):
        current_bottom[x] = sum([ts[i][x] for i in range(0,counter)])

    if counter == 0:
        ax.bar(labels, ts[counter], width, label=transformer_name)
    else:
        ax.bar(labels, ts[counter], width, label=transformer_name, bottom=current_bottom)
    counter = counter + 1

ax.set_xlim([0,17])
ax.set_ylabel('Transformers')
ax.set_xlabel('Generation')
ax.set_title('Transformers by Type and Generation for random-F1-min')
ax.legend()

plt.savefig("./figures/random-f1-min-transformer-mix.png")
plt.show()

del helper_df,labels,target_exp,fig,ax,width,array_size,ts

In [None]:
sbn.lineplot(df,y="F1",x="generation",estimator=np.median,hue="experiment")

plt.legend(loc="lower right")

plt.title("Development of F1-Score")

In [None]:
helper_df = df[~df["experiment"].str.contains("random")]
used_experiments = helper_df["experiment"].unique().to_list()

sbn.lineplot(helper_df,y="F1",x="generation",estimator=np.average,hue="experiment",hue_order=used_experiments)

plt.legend(loc="lower left")

plt.title("Development of F1-Score (Average)")
plt.xlim(0,16)

del helper_df,used_experiments

In [None]:
helper_df = df[~df["experiment"].str.contains("random")]
used_experiments = helper_df["experiment"].unique().to_list()

sbn.lineplot(helper_df,y="MRR",x="generation",estimator=np.median,hue="experiment",hue_order=used_experiments)

plt.legend(loc="lower left")

plt.title("Development of MRR-Score (Median)")
plt.xlim(0,16)

del helper_df,used_experiments

In [None]:
helper_df = df[~df["experiment"].str.contains("random")]
used_experiments = helper_df["experiment"].unique().to_list()

sbn.lineplot(helper_df,y="MRR",x="generation",estimator=np.mean,hue="experiment",hue_order=used_experiments)

plt.legend(loc="lower left")

plt.title("Development of MRR-Score (Mean)")
plt.xlim(0,16)

del helper_df,used_experiments

In [None]:
fig,ax = plt.subplots()
ax.grid(False)
ax.set_xlim(0,15)

sbn.lineplot(df[df["experiment"]=="F1-min"],x="generation",y="TRANSFORMATIONS", estimator=np.mean,
             label="Transformations",ax=ax,legend=True,color="olive")
plt.legend(loc="upper right")
plt.xlabel("Generations")
ax2=ax.twinx()
ax2.grid(False)
ax.set_ylabel("F1-Score")
sbn.lineplot(df[df["experiment"]=="F1-min"],x="generation",y="F1", estimator=np.median,
             label="F1-Score",ax=ax2,legend=True,color="lightseagreen")

plt.legend(loc="upper left")
plt.xlabel("Generations")
plt.title("Comparison of growing Transformations and falling F1-Score")

plt.savefig("./figures/comparison-f1-ts-f1-min.png")

plt.show()

In [None]:
fig,ax = plt.subplots()
ax.grid(False)
ax.set_xlim(0,15)
ax.set_ylabel("Transformations")

sbn.lineplot(df[df["experiment"]=="F1-min"],x="generation",y="TRANSFORMATIONS", estimator=np.mean,
             label="Transformations",ax=ax,legend=True,color="olive")
plt.legend(loc="upper right")
plt.xlabel("Generations")

ax2=ax.twinx()
ax2.grid(False)

ax2.set_ylabel("Score")
sbn.lineplot(df[df["experiment"]=="F1-min"],x="generation",y="MRR", estimator=np.median,
             label="MRR-Score",ax=ax2,legend=True,color="goldenrod")
plt.legend(loc="upper left")
plt.title("Comparison of growing Transformations and rising MRR-Score")

plt.savefig("./figures/comparison-MRR-ts-f1-min.png")

plt.show()

In [None]:
fig,ax = plt.subplots()
ax.grid(False)
ax.set_xlim(0,15)

sbn.lineplot(df[df["experiment"]=="F1-min"],x="generation",y="TRANSFORMATIONS", estimator=np.mean,
             label="Transformations",ax=ax,legend=True,color="olive")
plt.legend(loc="upper right")
plt.xlabel("Generations")

ax2=ax.twinx()
ax2.grid(False)

ax2.set_ylabel("Score")
sbn.lineplot(df[df["experiment"]=="F1-min"],x="generation",y="MRR", estimator=np.median,
             label="MRR-Score",ax=ax2,legend=True,color="goldenrod")
sbn.lineplot(df[df["experiment"]=="F1-min"],x="generation",y="F1", estimator=np.median,
             label="F1-Score",ax=ax2,legend=True,color="lightseagreen")

plt.legend(loc="upper left")
plt.title("Comparison of growing Transformations and rising MRR-Score")

plt.savefig("./figures/comparison-scores-and-ts-for-f1-min.png")

plt.show()

## Notes

Things I saw / take away from plots:

- There are two interesting values, F1 and MRR both for F1-max.
- MRR seems to be shaky but not moving much, this could be because the model was trained with F1
- More movement with more generations
- There is a base-line jump for MRR min and max experiments?
- The F1 min maximizes MRR ...
- Genetic reaches less generations than random
- F1-max has a slight peak upwards, likely because not all seeds reached the generation 14 (the peak upwards is only for avg, not for median)
-

## Deprecated Plots

In [None]:
'''
sbn.relplot(data=df,x="generation",y="MRR", hue="algorithm")

sbn.relplot(data=df,x="generation",y="F1", hue="algorithm")

sbn.relplot(data=broader_grouped_df,x="generation",y="F1", hue="experiment")

sbn.relplot(data=broader_grouped_df,x="generation",y="F1", hue="experiment")

sbn.histplot(data=df[df["experiment"]=="F1-min"],x="TRANSFORMATIONS")

sbn.histplot(data=df[df["experiment"]=="random-F1-min"],x="TRANSFORMATIONS")

sbn.lineplot(df[df["experiment"]=="F1-min"],x="generation",y="TRANSFORMATIONS")
'''

In [None]:
#for exp in all_experiments:
#    sbn.relplot(data=broader_grouped_df[broader_grouped_df["experiment"]==exp],x="generation",y="F1",kind="line")
#    plt.title(f"F1 Score for {exp}")
#    plt.show()
#
#
#for exp in all_experiments:
#    sbn.relplot(data=broader_grouped_df[broader_grouped_df["experiment"]==exp],x="generation",y="MRR",kind="line")
#    plt.title(f"MRR Score for {exp}")
#    plt.show()

In [None]:
'''
# This was meant to help me with making a nice curve for the comparison transformers <> f1, but didn't work well

helper_df = broader_grouped_df[broader_grouped_df["experiment"]=="F1-min"].sort_values("generation")

ts = list(helper_df["TRANSFORMATIONS"])
f1s = list(helper_df["F1"])

num_items = len(ts)

tss = [ts[0],ts[6],ts[9],ts[12],ts[16]]
f1ss = [f1s[0],f1s[6],f1s[9],f1s[12],f1s[16]]



f = interpolate.interp1d(np.arange(0,len(tss)), tss,fill_value="extrapolate")
h = interpolate.interp1d(np.arange(0,len(f1ss)), f1ss,fill_value="extrapolate")

xnew = np.arange(0, 12, 0.1)
ynew = f(xnew)   # use interpolation function returned by `interp1d`
plt.plot(xnew, ynew, '-')
plt.show()
'''