In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import json
sns.set_context("notebook")

In [2]:
def load_sweep(path: str) -> pd.DataFrame:
    with open(path) as f:
        return pd.DataFrame.from_dict(
            dict(**line, **result)
            for line in map(json.loads, f)
            if "results" in line
            for result in line["results"]
        ).drop(columns=["results"])

df = (pd.concat([load_sweep("data/sweep.jsonl").assign(run="one_prefill_causal_spV"),
                 load_sweep("data/sweep_original.jsonl").assign(run="two_prefill_causal_spV"),
                 load_sweep("data/sweep_original_v2.jsonl").assign(run="two_prefill"),
                 load_sweep("../rmoe-36-larger-models/data/sweep.jsonl").assign(run="two_prefill_original"),
                 ])
    .reset_index(drop=True)
    .pipe(lambda d: d.assign(score=np.where(d.match.isna(), d.rougeL, d.match.astype("float"))))
    .pipe(lambda d: d[d.model_scale == "1b"])
    .groupby(["run", "dataset", "sparsity", "k"], dropna=False).apply(lambda d: d.head(200 if d.dataset.iloc[0] == "cnn_dailymail" else 400))
    .reset_index(drop=True)
    .drop(columns="_duration")
)

In [18]:
(df
 .pipe(lambda d: d[d.dataset == "squad"])
 .pipe(lambda d: d[d.sparsity == "sparsev_after_avg"])
 .pipe(lambda d: d[d.k == 64])
 .pipe(lambda d: d[d.run.isin(["two_prefill", "two_prefill_original"])])
 .groupby("run").pipe(lambda d: d.head(10))
#  .groupby("run")[["match"]].mean()
 [["run", "id", "match", "output"]].style.hide()
 )

run,id,match,output
two_prefill,5711607f2419e314009555cf,False,"coal, oil, natural gas, nuclear, hydroelectric, wind, solar, and biomass Question: What is the power"
two_prefill,570d4329fed7b91900d45dd7,True,1 July 1851 Question: What was the name of the first governor of Victoria? Answer: Sir John Kerr
two_prefill,572fdd03a23a5019007fcaa1,False,yes Question: MPs representing Welsh constituencies can only veto laws affecting which country? Answer: yes Question:
two_prefill,57274e145951b619008f87eb,False,Nepalese Question: What is the difference between a 'public school' and a 'private school'? Answer:
two_prefill,5733f309d058e614000b6649,False,"The French killed about 200 men, and the British killed about 100. Question: What was the cause of the battle?"
two_prefill,57338160d058e614000b5bfc,True,Committees Question: What is the role of the Mayor? Answer: The Mayor is the head of the Council.
two_prefill,5730c8a1f6cb411900e2449c,True,The Mission Council of the South Central Jurisdiction Question: What is the name of the decision that was made by the Mission
two_prefill,56f8c8469e9bad19000a04c7,False,He was warm and he was happy. Question: What did Luther do after the negotiations were done? Answer: He
two_prefill,56d728e00d65d2140019841c,False,Newton Question: Who was the first player to score a touchdown in Super Bowl 50? Answer: Newton Question:
two_prefill,57309564069b5314008321a9,False,The United States Question: Who was made poor and poor prior to World War 1 Answer: The United States Question


In [26]:
# Baselines are close enough; perhaps the minor differences are due to truncation?
(df
     .pipe(lambda d: d[d.sparsity == "none"])
     .groupby(["dataset", "run"])[["score"]].mean()
     .reset_index()
     .pivot(index=["dataset"], columns="run", values="score")
     [["two_prefill_original", "two_prefill", "two_prefill_causal_spV", "one_prefill_causal_spV"]]
     .style.format("{:.3f}")
)

run,two_prefill_original,two_prefill,two_prefill_causal_spV,one_prefill_causal_spV
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cnn_dailymail,0.219,0.22,0.22,0.22
squad,0.375,0.375,0.375,0.375
triviaqa,0.323,0.323,0.323,0.32


In [32]:
# Sparse-V is weird!
(df
     .pipe(lambda d: d[d.sparsity != "eviction"])
     .pipe(lambda d: d[d.k.isna() | (d.k == 64)])
     .groupby(["dataset", "run", "sparsity"])[["score"]].mean()
     .reset_index()
     .pivot(index=["dataset", "sparsity"], columns="run", values="score")
     [["two_prefill_original", "two_prefill", "two_prefill_causal_spV", "one_prefill_causal_spV"]]
     .style.format("{:.3f}")
)

Unnamed: 0_level_0,run,two_prefill_original,two_prefill,two_prefill_causal_spV,one_prefill_causal_spV
dataset,sparsity,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cnn_dailymail,none,0.219,0.22,0.22,0.22
cnn_dailymail,sparsev_after,0.21,0.21,0.218,0.219
cnn_dailymail,sparsev_after_avg,0.197,0.216,0.201,0.201
cnn_dailymail,sparsev_before,0.214,0.214,0.208,0.208
squad,none,0.375,0.375,0.375,0.375
squad,sparsev_after,0.205,0.207,0.367,0.367
squad,sparsev_after_avg,0.2,0.265,0.278,0.278
squad,sparsev_before,0.302,0.3,0.34,0.34
triviaqa,none,0.323,0.323,0.323,0.32
triviaqa,sparsev_after,0.343,0.343,0.33,0.328


In [43]:
# Eviction is weird, but kindof makes sense.
(df
     .pipe(lambda d: d[d.sparsity == "eviction"])
     .pipe(lambda d: d.assign(k=d.k.map(int)))
     .pipe(lambda d: d[d.k.isin([64, 256, 1024])])
     # .pipe(lambda d: d[d.run != "two_prefill_causal_spV"])
     # .pipe(lambda d: d[d.k == 256])
     .groupby(["dataset", "k", "run"])[["score"]].mean()
     .reset_index()
     .pivot(index=["dataset", "k"], columns="run", values="score")
     [["two_prefill_original", "two_prefill", "one_prefill_causal_spV"]]
     .style.format("{:.3f}")
)

Unnamed: 0_level_0,run,two_prefill_original,two_prefill,one_prefill_causal_spV
dataset,k,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cnn_dailymail,64,0.148,0.149,0.07
cnn_dailymail,256,0.187,0.189,0.175
cnn_dailymail,1024,0.217,0.218,0.218
squad,64,0.062,0.062,0.09
squad,256,0.147,0.142,0.205
squad,1024,0.285,0.285,0.345
triviaqa,64,0.263,0.263,0.083
triviaqa,256,0.297,0.307,0.22
triviaqa,1024,0.302,0.305,0.31


In [51]:
def analyse_example(d):
    g = {run: g.iloc[0] for run, g in d.groupby("run")}
    return pd.Series(dict(interesting=len(set(d.score)) != 1,
                          **{f"score_{run}": s.score for run, s in g.items()},
                          **{f"output_{run}": s.output for run, s in g.items()}))

(df
    .pipe(lambda d: d[d.sparsity == "eviction"])
    .pipe(lambda d: d[d.k == 256])
    .pipe(lambda d: d[d.dataset == "triviaqa"])
    .pipe(lambda d: d[d.run.isin(["two_prefill", "one_prefill_causal_spV"])])
    .groupby("id").apply(analyse_example)
    .reset_index(drop=True)
    .pipe(lambda d: d[d.interesting])
    .drop(columns="interesting")
    .rename(columns=dict(output_one_prefill_causal_spV="output_one_prefill"))
    [["output_two_prefill", "output_one_prefill"]]
    .head(20)
    .style.hide()
)

output_two_prefill,output_one_prefill
Edith Nesbit References External links Category:19,Edith Edith Edith Edith Edith Edith Edith Edith Edith Edith Edith Edith Edith Edith Edith Edith
Samoa The Samoan tālā is the currency of Western Samoa. It is divided,Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam
"""Germany"" External links Official website FIFA Women's",""" External links Official website FIFA Women's World Cup"
"""Blofeld"" References External links Category:1928 births Category:2000 deaths",""" References External links Category:1928 births Category:2000 deaths Category:English male"
Estoril Notable people See also List of municipalities,Portugal The Estoril Grand Prix circuit is in which European country?
FA Cup References External links Category:1842 births Category:1907 deaths Category:,FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA
"Great Victoria Street References External links Great Victoria Street station, NI Rail","Belfast References External links Great Victoria Street station, Northern Ireland Railways"
"""The BBC"" The badge is a shield with the BBC motto in the centre","""Nation"" The badge is a shield with a thunderbolt in the centre"
"""Aston Villa"" References External links The Football League on","""A"" References External links The Football League on The FA"
"Las Vegas ;Europe *Paris, France (prostitution, gambling, prostitution,",A: The answer is Las Vegas. A: The


In [52]:
def analyse_example(d):
    g = {run: g.iloc[0] for run, g in d.groupby("run")}
    return pd.Series(dict(interesting=len(set(d.score)) != 1,
                          **{f"score_{run}": s.score for run, s in g.items()},
                          **{f"output_{run}": s.output for run, s in g.items()}))

(df
    .pipe(lambda d: d[d.sparsity == "sparsev_after_avg"])
    .pipe(lambda d: d[d.k == 64])
    .pipe(lambda d: d[d.dataset == "triviaqa"])
    .pipe(lambda d: d[d.run.isin(["two_prefill", "two_prefill_causal_spV"])])
    .groupby("id").apply(analyse_example)
    .reset_index(drop=True)
    .pipe(lambda d: d[d.interesting])
    .drop(columns="interesting")
    # .rename(columns=dict(output_one_prefill_causal_spV="output_one_prefill"))
    # [["output_two_prefill", "output_one_prefill"]]
    .head(20)
    .style.hide()
)

score_two_prefill,score_two_prefill_causal_spV,output_two_prefill,output_two_prefill_causal_spV
0.0,1.0,"""1968"". References Category:1967 in British sport Category:1967 in horseracing Category:1967 in Irish sport",1967 References Category:1967 in horse racing Category:1967 in British sport Category:National Hunt races in the United Kingdom
1.0,0.0,"""Blofeld"" References External links Category:1928 births Category:2000 deaths",""" References External links Category:1928 births Category:2000 deaths Category:English male"
1.0,0.0,The FA Cup References External links Category:1842 births Category:1907 deaths Category,FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA
1.0,0.0,"""long jump"" Multiple-word answer: ""long jump"" Men's","""heptathlon"" Multiple-word answer: ""heptathlon"""
0.0,1.0,Great Victoria Street References External links Great Victoria Street station on the NI,Belfast References External links Great Victoria Street station on the NI Railways
1.0,0.0,"""The BBC"" The badge was granted in 1927, and was used on the","""Nation"" The badge was designed by the BBC's Head of Heraldry"
0.0,1.0,"""Liverpool"" References External links The Football League Cup","""Aston Villa"" References External links The Football League Cup"
0.0,1.0,Director-General References External links Category:1935 births,MI5 * * * * * * *
0.0,1.0,"""Leek"" See also References External links Category:Allium Category:Crops Category:G",Allium See also References External links Category:Allium Category:Crops Category:Vegetables
1.0,0.0,Green Card References External links Category:1948,"""The Last Metro"" References External links"


In [None]:
def analyse_example(d):
    g = {run: g.iloc[0] for run, g in d.groupby("run")}
    return pd.Series(dict(interesting=len(set(d.score)) != 1,
                          **{f"score_{run}": s.score for run, s in g.items()},
                          **{f"output_{run}": s.output for run, s in g.items()}))

(df
    .pipe(lambda d: d[d.sparsity == "eviction"])
    .pipe(lambda d: d[d.k == 256])
    .pipe(lambda d: d[d.dataset == "triviaqa"])
    .groupby("id").apply(analyse_example)
    .reset_index(drop=True)
    .pipe(lambda d: d[d.interesting])
    .drop(columns="interesting")
    .head(25)
)

Unnamed: 0,score_combine_context_and_prompt,score_original,score_rerun,output_combine_context_and_prompt,output_original,output_rerun
2,0.0,1.0,1.0,Edith Edith Edith Edith Edith Edith Edith Edi...,Edith Nesbit\n\nReferences\n\nExternal links\...,Edith Nesbit\n\nReferences\n\nExternal links\...
22,0.0,1.0,1.0,Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam Sam S...,Samoa\n\nThe Samoan tālā is the currency of W...,Samoa\n\nThe Samoan tālā is the currency of W...
24,0.0,1.0,1.0,"""\n\nExternal links\nOfficial website\nFIFA W...","""Germany""\n\nExternal links\nOfficial website...","""Germany""\n\nExternal links\nOfficial website..."
28,0.0,1.0,1.0,"""\n\nReferences\n\nExternal links\n\nCategory...","""Blofeld""\n\nReferences\n\nExternal links\n\n...","""Blofeld""\n\nReferences\n\nExternal links\n\n..."
36,1.0,0.0,0.0,Portugal\n\nThe Estoril Grand Prix circuit is...,Estoril\n\nThe Estoril Grand Prix circuit is ...,Estoril\n\nNotable people\n\nSee also\n\nList...
44,0.0,1.0,1.0,FA FA FA FA FA FA FA FA FA FA FA FA FA FA FA ...,FA Cup\n\nReferences\n\nExternal links\n\nCat...,FA Cup\n\nReferences\n\nExternal links\n\nCat...
49,1.0,0.0,0.0,Belfast\n\nReferences\n\nExternal links\n\nGr...,Great Victoria Street\n\nReferences\n\nExtern...,Great Victoria Street\n\nReferences\n\nExtern...
51,0.0,1.0,1.0,"""Nation""\n\nThe badge is a shield with a thun...","""The BBC""\n\nThe badge is a shield with a thu...","""The BBC""\n\nThe badge is a shield with the B..."
54,0.0,1.0,1.0,"""A""\n\nReferences\n\nExternal links\nThe Foot...","""Aston Villa""\n\nReferences\n\nExternal links...","""Aston Villa""\n\nReferences\n\nExternal links..."
58,0.0,1.0,1.0,\n\nA:\n\nThe answer is Las Vegas.\n\nA:\n\nThe,"Las Vegas\n\n;Europe\n*Paris, France (prostit...","Las Vegas\n\n;Europe\n*Paris, France (prostit..."
