In [None]:
# ! pip install ../
# ! pip install matplotlib

all examples from README

In [None]:
import learn_to_pick

In [None]:
from learn_to_pick import PickBest

# with an llm scorer

class fake_llm:
    def predict(self, inputs):
        print(f"here are the inputs: {inputs}")
        dummy_score = 1
        return dummy_score

picker = PickBest.create(llm=fake_llm())
result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked"])

In [None]:
from learn_to_pick import PickBest

# with a custom scorer

class CustomSelectionScorer(learn_to_pick.SelectionScorer):
    def score_response(self, inputs, picked, event) -> float:
        print(f"inputs: {inputs}")
        pick = picked["pick"]
        criteria = event.based_on["criteria"]
        # evaluate pick based on criteria
        dummy_score = 1
        return dummy_score

picker = PickBest.create(selection_scorer=CustomSelectionScorer())
result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked"])

In [None]:
from learn_to_pick import PickBest

# with delayed score

picker = PickBest.create(selection_scorer=None)
result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked"])

# evaluated the result asynchronusly in a different part of my system and determined a score
dummy_score = 1
picker.update_with_delayed_score(dummy_score, result)

In [None]:
class fake_llm:
    def predict(self, *args):
        return "5"

scoring_criteria_template = "Given {criteria} rank how good or bad this selection is {pick}"
picker = learn_to_pick.PickBest.create(
    selection_scorer=learn_to_pick.AutoSelectionScorer(llm=fake_llm(), scoring_criteria_template_str=scoring_criteria_template),
)

result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked"])

In [None]:
# I want the score to be in the range [-1, 1] instead of the default [0, 1]

REWARD_PROMPT = """

Given {criteria} rank how good or bad this selection is {pick}

IMPORANT: you MUST return a single number between -1 and 1, -1 being bad, 1 being good

"""
picker = learn_to_pick.PickBest.create(
    selection_scorer=learn_to_pick.AutoSelectionScorer(llm=fake_llm(), prompt=REWARD_PROMPT)
)

result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked"])

In [None]:
class CustomSelectionScorer(learn_to_pick.SelectionScorer):
    def score_response(self, inputs, picked, event: learn_to_pick.PickBestEvent) -> float:
        # inputs: the inputs to the picker in Dict[str, Any] format
        # picked: the selection that was made by the policy
        # event: metadata that can be used to determine the score if needed
        
        # scoring logic goes here

        dummy_score = 1.0
        return dummy_score


picker = learn_to_pick.PickBest.create(
    selection_scorer=CustomSelectionScorer()
)

result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked_metadata"].selected.score)

In [None]:
def a_function(inputs, picked, event):
    # I want to set the score here for some reason instead of defining a scorer
    print("hello world")
    event.selected.score = 5.0
    return inputs, event

picker = learn_to_pick.PickBest.create(
    callbacks_before_scoring = [a_function],
    selection_scorer=None
)

result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked_metadata"].selected.score)

In [None]:
picker.save_progress()

In [None]:
picker = learn_to_pick.PickBest.create(
    callbacks_before_scoring = [a_function],
    selection_scorer=None
)

In [None]:
picker = learn_to_pick.PickBest.create(model_save_dir='./models', selection_scorer=None)
picker.save_progress()
picker = learn_to_pick.PickBest.create(model_save_dir='./models', selection_scorer=None)

In [None]:
class CustomSelectionScorer(learn_to_pick.SelectionScorer):
    def score_response(self, inputs, picked, event: learn_to_pick.PickBestEvent) -> float:
        # inputs: the inputs to the picker in Dict[str, Any] format
        # picked: the selection that was made by the policy
        # event: metadata that can be used to determine the score if needed
        
        # scoring logic goes here

        dummy_score = 1.0
        return dummy_score


picker = learn_to_pick.PickBest.create(
    selection_scorer=CustomSelectionScorer()
)

result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked_metadata"].selected.score)

picker.deactivate_selection_scorer()
result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked_metadata"].selected.score)
picker.activate_selection_scorer()
result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked_metadata"].selected.score)


In [None]:
vw_cmd = ["--cb_explore_adf", "--squarecb", "--interactions=::"]

picker = learn_to_pick.PickBest.create(vw_cmd=vw_cmd, selection_scorer=None)

result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked"])

In [None]:
picker = learn_to_pick.PickBest.create(vw_logs='./logs', selection_scorer=None)

In [None]:
result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("some criteria")
)

print(result["picked"])

In [None]:
picker = learn_to_pick.PickBest.create(vw_logs='./logs', selection_scorer=None, feature_embedder=learn_to_pick.PickBestFeatureEmbedder(auto_embed=True))
result = picker.run(pick = learn_to_pick.ToSelectFrom(["option1", "option2"]),
                    criteria = learn_to_pick.BasedOn("age:32")
)

result = picker.run(pick = learn_to_pick.ToSelectFrom([
                        ["meal 1 name", "meal 1 description"],
                        ["meal 2 name", "meal 2 description"]
                    ]),
                    criteria = learn_to_pick.BasedOn("age:32")
)

result = picker.run(pick = learn_to_pick.ToSelectFrom([
                        {"name":"meal 1 name", "description" : "meal 1 description"},
                        {"name":"meal 2 name", "description" : "meal 2 description"}
                    ]),
                    criteria = learn_to_pick.BasedOn("age:32")
)

result = picker.run(pick = learn_to_pick.ToSelectFrom([
                    {"name":["meal 1", "complex name"], "description" : "meal 1 description"},
                    {"name":["meal 2", "complex name"], "description" : "meal 2 description"}
                ]),
                criteria = learn_to_pick.BasedOn(["Tom Joe", "age:32", "state of california"])
)

In [None]:
from sentence_transformers import SentenceTransformer

picker = learn_to_pick.PickBest.create(
    feature_embedder=learn_to_pick.PickBestFeatureEmbedder(
        auto_embed=True,
        model=SentenceTransformer("multi-qa-mpnet-base-dot-v1")
    ),
    vw_logs='./logs',
    selection_scorer=None
)

result = picker.run(pick = learn_to_pick.ToSelectFrom([
                    {"name":["meal 1", "complex name"], "description" : "meal 1 description"},
                    {"name":["meal 2", "complex name"], "description" : "meal 2 description"}
                ]),
                criteria = learn_to_pick.BasedOn(["Tom Joe", "age:32", "state of california"])
)

In [None]:
picker = learn_to_pick.PickBest.create(
    feature_embedder=learn_to_pick.PickBestFeatureEmbedder(
        auto_embed=False,
    ),
    vw_logs='./logs',
    selection_scorer=None
)

result = picker.run(pick = learn_to_pick.ToSelectFrom([
                    {"name":["meal 1", "complex name"], "description" : "meal 1 description"},
                    {"name":["meal 2", "complex name"], "description" : "meal 2 description"}
                ]),
                criteria = learn_to_pick.Embed(learn_to_pick.BasedOn(["Tom Joe", "age:32", "state of california"]))
)