# Examples from Finetuned Models

In [None]:
%load_ext autoreload
%autoreload 2

import os
from transformers import BartTokenizerFast

from tabulate import tabulate
import pandas as pd

from IPython.display import display, HTML

In [None]:
from transformers import BartForConditionalGeneration, RobertaTokenizerFast, Trainer, TrainingArguments, DataCollator
import os
from glob import glob

def load_model(path):
    model = BartForConditionalGeneration.from_pretrained(path)
    return model

def load_model_wandb(wandb_id, results_path = "results/"):
    model_path = os.path.join(results_path,wandb_id,wandb_id,"*","")
    model_checkpoints = glob(model_path)
    print(model_checkpoints)
    most_recent_checkpoint = sorted(model_checkpoints)[-1]
    return load_model(most_recent_checkpoint)

def get_alternative_interface(model, tokenizer):
    def interface(text, gen_kwargs = {"max_length":50,
                                      "num_beams":5,
                                      "early_stopping":True}):
        inputs = tokenizer(text, return_tensors="pt")
        # outputs = model.generate(inputs["input_ids"], max_length=250, do_sample=True, top_p=0.95, top_k=60)
        outputs = model.generate(inputs["input_ids"], **gen_kwargs)
        print(outputs)
        return ([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in outputs.tolist()])
    return interface

def process_input(model,tokenizer,inputs):
    outputs = model.generate(
                        inputs["input_ids"], 
                        max_length=50, 
                        num_beams=5, 
                        early_stopping=True)

    return ([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in outputs.tolist()])[0]


In [None]:
RESULTS_PATH = "../../results"

TOKENIZER_PATH = "../../models/CORAL_BART/tokenizer"
vocab_path = os.path.join(TOKENIZER_PATH,"vocab.json")
merges_path = os.path.join(TOKENIZER_PATH, "merges.txt")
tokenizer = BartTokenizerFast(vocab_path,merges_path)

# [fancy-sponge-24](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/21hscc2l)
Initialized with `facebook/bart-large` 

In [None]:
test_str = """clf=LogisticeRegression(x,y)"""

fancy_sponge = load_model("/homes/gws/mikeam/RobustDataScience/results/fancy-sponge-24/checkpoint-120000")
fancy_sponge_alternatives = get_alternative_interface(fancy_sponge,tokenizer)
fancy_sponge_alternatives(test_str)

# [sleek-vortex-25](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/1gnq0z54?workspace=user-mikeamerrill) 
Initialized with random weights.

In [None]:
sleek_vortex = load_model("/homes/gws/mikeam/RobustDataScience/results/sleek-vortex-25/checkpoint-40000")

In [None]:
sleek_vortex_alternatives = get_alternative_interface(sleek_vortex,tokenizer)
sleek_vortex_alternatives(test_str)

In [None]:
TEST_STRS = [
    "clf=xgb(x,y)",
    "LogisticRegression.fit(x,y)",
    "SVC(kernel='rbf')"
]
TEST_PARAMS = [
    ("Greedy",{"max_length":25}),
    ("3-Beams",{"num_beams":3,"max_length":25,"early_stopping":True}),
    ("5-Beams",{"num_beams":5,"max_length":25,"early_stopping":True}),
    ("5-Beams-No-2-Grams",{"num_beams":5,"max_length":25,"early_stopping":True,"no_repeat_ngram_size":2}),
    ("Pure-Sampling",{"max_length":25, "top_k":0, "early_stopping":True, "do_sample":True}),
    ("Top-3-Sampling",{"max_length":25, "top_k":3, "early_stopping":True, "do_sample":True}),
    ("Top-5-Sampling",{"max_length":25, "top_k":5, "early_stopping":True, "do_sample":True}),
    ("Top-5-Sampling-Higher-Temp",{"max_length":25, "top_k":5, "early_stopping":True, "do_sample":True, "temperature":0.7}),
    ("Nucleus-Sampling-0.9",{"max_length":25, "top_k":5, "early_stopping":True, "do_sample":True, "top_p":0.9}),
    ("Nucleus-Sampling-0.5",{"max_length":25, "top_k":5, "early_stopping":True, "do_sample":True, "top_p":0.5})
]

def sampling_strats(text,interface,num_return_sequences=1):
    print(text)
    results = []
    for strat_name, params in TEST_PARAMS:
        if not (len(params) == 1 and "max_length" in params):
            params["num_return_sequences"] = num_return_sequences
        result = "\n--------------\n".join(interface(text,gen_kwargs = params))
        results.append([strat_name,result])
    print(tabulate(results,["Strategy","Result"],tablefmt="fancy_grid"))

In [None]:
len({"a":1})

# [still-shadow-44](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/1sy5io22?workspace=)
Was trained with Ge's method for encouraging differences:


In [None]:
still_shadow = load_model_wandb("still-shadow-44",results_path = RESULTS_PATH)
still_shadow_alternatives = get_alternative_interface(still_shadow, tokenizer)
sampling_strats("LogisticRegression.fit(x,y)", still_shadow_alternatives)

In [None]:
!shuf -n 10 /homes/gws/mikeam/RobustDataScience/results/still-shadow-44/still-shadow-44/eval-preds-10000.jsonl | jq -r .

# [vivid-fire-41](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/13de72q6?workspace=user-mikeamerrill)
This model doesn't have the method for discouraging differences, but was intialized with facebook weights

In [None]:
vivid_fire = load_model("/homes/gws/mikeam/RobustDataScience/results/vivid-fire-41/vivid-fire-41/checkpoint-200000")
vivid_fire_alternatives = get_alternative_interface(vivid_fire,tokenizer)
sampling_strats("LogisticRegression.fit(x,y)", vivid_fire_alternatives)

In [None]:
sampling_strats("xgboost = XGBClassifier()\nparameters_rf = {'n_estimators' : [1000],'learning_rate': [0.1],\n             'max_depth': [5]}", vivid_fire_alternatives)

## [rural-spaceship-48](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/1xnx0ges?workspace=user-mikeamerrill)
This model was trained using FACE - Frequence Aware Cross-Entropy Loss

In [None]:
rural_spaceship = load_model("/homes/gws/mikeam/RobustDataScience/results/rural-spaceship-48/checkpoint-80000")
rural_spaceship_alternatives = get_alternative_interface(rural_spaceship,tokenizer)

In [None]:
sampling_strats("xgboost = XGBClassifier()\nparameters_rf = {'n_estimators' : [1000],'learning_rate': [0.1],\n             'max_depth': [5]}", rural_spaceship_alternatives)

I think theres evidence here that BPE tokenization is screwing us over. I don't understand why "nPlateaun" is showing up. We do have the following in our training set:
```
ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=1, cooldown=1, verbose=1, min_lr=1e-7)
```
I think it's definitely going to be productive to somehow use more information from the library structure. For example, building a knowledge graph with the library stucture. 

## [glorious-snowflake-54](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/3wwtknpy?workspace=user-mikeamerrill)
Trained on smaller diffs, FAST

In [None]:
glorious_snowflake = load_model("/homes/gws/mikeam/RobustDataScience/results/glorious-snowflake-54/checkpoint-120000/")
glorious_snowflake_alternatives = get_alternative_interface(glorious_snowflake,tokenizer)

In [None]:
sampling_strats("xgboost = XGBClassifier()\nparameters_rf = {'n_estimators' : 1000,'learning_rate': 0.1,\n             'max_depth': [5]}", glorious_snowflake_alternatives)

Honestly.... this looks better to me. With Pure-Sampling  we got `LGBMRegressor` in there, and even if the learning rate was changed to zero, that's still the token that we'd want to see modified. 

In [None]:
sampling_strats("LogisticRegression.fit(x,y)", glorious_snowflake_alternatives)

In [None]:
sampling_strats("""clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X, y)""", glorious_snowflake_alternatives)

## [zany-music-55](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/3aq45qf7/overview?workspace=user-mikeamerrill)
Same as above, but with lower learning rate:

In [None]:
zany_music = load_model("/homes/gws/mikeam/RobustDataScience/results/glorious-snowflake-54/checkpoint-120000/")
zany_music_alternatives = get_alternative_interface(zany_music,tokenizer)

In [None]:
sampling_strats("xgboost = XGBClassifier()\nparameters_rf = {'n_estimators' : [1000],'learning_rate': [0.1],\n             'max_depth': [5]}", zany_music_alternatives)

In [None]:
!shuf -n 10 /homes/gws/mikeam/RobustDataScience/results/zany-music-55/eval-preds-128700.jsonl | jq -r .

### [polar-puddle-58](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/1rk236ew/overview?workspace=user-mikeamerrill)
Trained using the first of the "predict_spans" task where the model only calculated loss on tokens that changed. Looks like it seriously overfit.

In [None]:
polar_puddle = load_model("/homes/gws/mikeam/RobustDataScience/results/polar-puddle-58/checkpoint-90000/")
polar_puddle_alternatives = get_alternative_interface(polar_puddle,tokenizer)
sampling_strats("xgboost = XGBClassifier()\nparameters_rf = {'n_estimators' : [1000],'learning_rate': [0.1],\n             'max_depth': [5]}", polar_puddle_alternatives)

A lot of this makes sense to me? We either still need to generate the other tokens, so we still need to include them in the loss. 

[worldly-microwave-59](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/37mjeivo?workspace=user-mikeamerrill)

In [None]:
worldly_microwave = load_model("/homes/gws/mikeam/RobustDataScience/results/worldly-microwave-59/checkpoint-90000/")
worldly_microwave_alternatives = get_alternative_interface(worldly_microwave,tokenizer)

In [None]:
sampling_strats("xgboost = XGBClassifier()\nparameters_rf = {'n_estimators' : [1000],'learning_rate': [0.1],\n             'max_depth': [5]}", worldly_microwave_alternatives, num_return_sequences = 2 )

### [earnest-river-64](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/2whutfrj?workspace=user-mikeamerrill)
First model trained with the masked loss on mixed dataset for 30 epochs. Loss got very low!

In [None]:
earnest_river = load_model("/homes/gws/mikeam/RobustDataScience/results/earnest-river-64/checkpoint-220000/")
earnest_river_alternatives = get_alternative_interface(earnest_river,tokenizer)

In [None]:
sampling_strats("xgboost = XGBClassifier()\nparameters_rf = {'n_estimators' : [1000],'learning_rate': [0.1],\n             'max_depth': [5]}", earnest_river_alternatives)

In [None]:
!shuf -n 10 /homes/gws/mikeam/RobustDataScience/results/earnest-river-64/eval-preds-175000.jsonl | jq -r .

In [None]:
sampling_strats("model = gensim.models.Word2Vec(sentences, size=100, window=5, min_count=5, workers=4)", earnest_river_alternatives)

In [None]:
sampling_strats("gensim.models.Word2Vec(sentences, size=100, window=5, min_count=5, workers=4)", earnest_river_alternatives)

In [None]:
sampling_strats("model = gensim.models.Word2Vec(sentences, size=100, window=5, min_count=5, workers=4)",  earnest_river_alternatives,  num_return_sequences = 3)

## [winter-planet-66](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/1iumhxpg?workspace=user-mikeamerrill)
The first model trained with the multi-task loss

In [None]:
winter_planet = load_model("/homes/gws/mikeam/RobustDataScience/results/winter-planet-66/checkpoint-670000/")
winter_planet_alternatives = get_alternative_interface(winter_planet,tokenizer)

In [None]:
sampling_strats("xgboost = XGBClassifier()\nparameters_rf = {'n_estimators' : [1000],'learning_rate': [0.1],\n             'max_depth': [5]}", winter_planet_alternatives, num_return_sequences = 2 )

In [None]:
!shuf -n 10 /homes/gws/mikeam/RobustDataScience/results/winter-planet-66/eval-preds-95000.jsonl | jq -r .

In [None]:
def pretty_print(df):
    html_str = df.to_html().replace("\\n","<br>")
    return display( HTML( df.to_html().replace("\\n","<br>") ) )

def sample_predictions(path,limit=10):
    results = pd.read_json(path,lines=True)
    did_change = results[results["input"]!=results["label"]]
    is_small = did_change[did_change["input"].map(lambda x: len(x.split("\n"))) < 4]
    return is_small

In [None]:
results = sample_predictions("/homes/gws/mikeam/RobustDataScience/results/winter-planet-66/eval-preds-95000.jsonl", limit=100)
results.to_csv("./examples.csv",index=False)

In [None]:
results

In [None]:
results.to_csv("./examples.csv",index=False)

In [None]:
results = pd.read_json("/homes/gws/mikeam/RobustDataScience/results/winter-planet-66/eval-preds-95000.jsonl",lines=True)

In [None]:
results["input"].map(lambda x: len(x.split("\n"))).describe()

In [None]:
!wc -l /homes/gws/mikeam/RobustDataScience/results/winter-planet-66/eval-preds-95000.jsonl

In [None]:
sampling_strats("clf = LogisticRegression(x,y)",  winter_planet_alternatives,  num_return_sequences = 3)

In [None]:
orig = pd.read_json("/homes/gws/mikeam/RobustDataScience/data/processed/mixed.jsonl", lines=True)

In [None]:
orig["cell_diff"].map(lambda x: len(x.split("\n"))).describe()

In [None]:
orig

## [autumn-salad-68](https://app.wandb.ai/mikeamerrill/robustdatascience/runs/lg7ush09/overview?workspace=user-mikeamerrill)
Same as above, but with random masking of tokens that don't change

In [None]:
autumn_salad = load_model("/homes/gws/mikeam/RobustDataScience/results/winter-planet-66/checkpoint-210000/")
autumn_salad_alternatives = get_alternative_interface(autumn_salad,tokenizer)

In [None]:
sampling_strats("clf = LogisticRegression(x,y)",  autumn_salad_alternatives,  num_return_sequences = 3)

In response, we'll try training with the FACE loss, which should discourage results like these.

## hopeful-brook-108
Using span-aware generation

In [None]:
hopeful_brook_results = pd.read_json("/homes/gws/mikeam/RobustDataScience/results/hopeful-brook-108/eval-preds-55000.jsonl",lines=True)

In [None]:
hopeful_brook_results.head()[["input","label","prediction"]]

In [None]:
print(hopeful_brook_results.head().iloc[3]["input"])

In [None]:
print(hopeful_brook_results.head().iloc[3]["label"])

In [None]:
sample_predictions("/homes/gws/mikeam/RobustDataScience/results/hopeful-brook-108/eval-preds-55000.jsonl")\
                [["input","label","prediction"]].sample(100).to_csv("example_csvs/hopeful-brook-108.csv")

In [None]:
print("hey")