# Model Notebook
Useful for debugging configurations and viewing project configuration details.

## Setup
Configure defaults and select a project.

In [None]:
# Set defaults
default_models_directory = '../example_projects'
default_model = ""

from ipyfilechooser import FileChooser
import os
fc = FileChooser(
    os.path.join(default_models_directory, default_model), show_only_dirs=True,
    title="Select a Model Directory", select_default=True)
display(fc)

## Load Model

In [None]:
import sys, os
modules_path = os.path.join('..', 'src')
if modules_path not in sys.path: sys.path.insert(0, modules_path)
from pprint import pformat, pp
from IPython import display as ds

from tutorial.inference import show_predictions
import forgather.ml.notebooks as nb
from transformers import AutoTokenizer, AutoModelForCausalLM

assert os.path.exists(fc.selected_path), "Model directory does not exist."
tokenizer = AutoTokenizer.from_pretrained(fc.selected_path)
model = AutoModelForCausalLM.from_pretrained(
    fc.selected_path,
    local_files_only=True,
    trust_remote_code=True,
)
print(model)
print(tokenizer)

In [None]:
sample_text = """One day, a little girl named Lily found a needle in her room. She knew it was difficult to play with it because it was sharp. Lily wanted to share the needle with her mom, so she could sew a button on her shirt.

Lily went to her mom and said, "Mom, I found this needle. Can you share it with me and sew my shirt?" Her mom smiled and said, "Yes, Lily, we can share the needle and fix your shirt."

Together, they shared the needle and sewed the button on Lily's shirt.
"""

show_predictions(model, tokenizer, device="cpu", text=[sample_text])

### Simple Text Gen
This is a very simple text generator implementation.

[tutorial_code.textgen](../tutorial_code/textgen.py)  

In [None]:
from tutorial.textgen import TextGenerator

# Test text generation.
# Don't expect too much from this model, as the only input to each prediction is the previous word. 
text_gen = TextGenerator(model, tokenizer, "cpu", do_sample=True, seed=42)
text = text_gen.prompt("One day, a little girl", max_new_tokens=200)
print(repr(text))

In [None]:
print(text)

In [None]:
# https://huggingface.co/docs/transformers/v4.34.1/en/generation_strategies

class TextGen():
    def __init__(self, model, tokenizer, device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device

    def generate(self, prompt, do_sample=True, top_k=50, top_p=0.9, max_new_tokens=500):
        self.model.to(self.device)
        input_ids = self.tokenizer(prompt, return_tensors='pt')['input_ids'].to(self.device)
        outputs = model.generate(input_ids, do_sample=do_sample, top_k=top_k, top_p=top_p, max_new_tokens=max_new_tokens)
        return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

In [None]:
gen = TextGen(model, tokenizer, device="cpu")
print(gen.generate("One day, a little girl"))