In [None]:
# import google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%%sh

pip install bitsandbytes>=0.39.0
pip install accelerate==0.26.1
# pip install transformers==4.36.1
pip install datasets==2.16.1



In [None]:
%%sh

pip install git+https://github.com/azliu0/transformers.git

Collecting git+https://github.com/azliu0/transformers.git
  Cloning https://github.com/azliu0/transformers.git to /tmp/pip-req-build-e2ahip8u
  Resolved https://github.com/azliu0/transformers.git to commit 66d2bcde1b0ebc41a428e3ae864dd0c22883cdee
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'


  Running command git clone --filter=blob:none --quiet https://github.com/azliu0/transformers.git /tmp/pip-req-build-e2ahip8u


# phase 0->1 training

In [None]:
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig

fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
)

accelerator = Accelerator(fsdp_plugin=fsdp_plugin)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import transformers
import torch

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_use_double_quant=True,
# )

base_model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_0_1 = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="auto")

Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

In [None]:
# device = "cuda" # the device to load the model onto

# model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
# tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

In [None]:
model_0_1

MixtralForCausalLM(
  (model): MixtralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MixtralDecoderLayer(
        (self_attn): MixtralAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MixtralRotaryEmbedding()
        )
        (block_sparse_moe): MixtralSparseMoeBlock(
          (gate): Linear4bit(in_features=4096, out_features=8, bias=False)
          (experts): ModuleList(
            (0-7): 8 x MixtralBLockSparseTop2MLP(
              (w1): Linear4bit(in_features=4096, out_features=14336, bias=False)
              (w2): Linear4bit(in_features=14336, out_features=4096, bias=False)
              (w3): Linear4bit(in_features=4096, out_feat

In [None]:
from datasets import load_dataset

In [None]:
import os

train_dataset = load_dataset('json', data_files='/content/drive/MyDrive/treehacks/training.jsonl', split='train')
eval_dataset = load_dataset('json', data_files='/content/drive/MyDrive/treehacks/validation.jsonl', split='train')

In [None]:
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig

fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
)

accelerator = Accelerator(fsdp_plugin=fsdp_plugin)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,
)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
def generate_code_given_nlq(nlq):
    messages = [
      {"role": "user", "content": f"You are a terminal assistant. Your should respond to my query with a terminal command in MacOS that will solve the problem that I am having trouble with, and nothing else. I should be able to copy-paste your answer directly into my terminal and run it. For example, if I ask you how to list the contents of the current directory, your response should be `ls`. Please only respond with the command itself. Here is my query: \"{nlq}\""},
    ]


    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
    model_inputs = encodeds
    generated_ids = model_0_1.generate(model_inputs, max_new_tokens=1000, do_sample=True)
    decoded = tokenizer.batch_decode(generated_ids)
    return decoded[0]

In [None]:
nlq = "delete the file named `hello.txt`"
print(generate_code_given_nlq(nlq))

In [None]:
%%sh

pip install peft==0.6.2

In [None]:
from peft import prepare_model_for_kbit_training

model_0_1.gradient_checkpointing_enable()
model_0_1 = prepare_model_for_kbit_training(model_0_1)

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

model_0_1 = get_peft_model(model_0_1, config)
# model = get_peft_model(model, peft_config=config, adapter_name="adapter_1")
# model.add_adapter(lora_config=config, adapter_name="adapter_1")
print_trainable_parameters(model_0_1)

In [None]:
print(model_0_1)

In [None]:
model_0_1 = accelerator.prepare_model(model_0_1)

In [None]:
print(train_dataset)

In [None]:
def generate_and_tokenize_prompt(prompt):
      messages = [
        {"role": "user", "content": prompt['input']},
        {"role": "assistant", "content": prompt['output']},
      ]
      # encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
      tokens = tokenizer.apply_chat_template(messages, tokenize=True)
      return { 'input_ids': tokens, 'attention_mask': [1 for _ in range(len(tokens))] }

tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt)
tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt)

In [None]:
import transformers
from datetime import datetime

project = "mistral-finetune"
base_model_name = "mistral2"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name

# from peft import PeftModel
# print(model.config._use_perft)
# print(model)

# import transformers.trainer

# print(dir(transformers.trainer))
# print(isinstance(model, PeftModel))
# from peft.utils import is_peft_available
# print(is_peft_available())

trainer = transformers.Trainer(
    model=model_0_1.to("cuda"),
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    args=transformers.TrainingArguments(
        output_dir=output_dir,
        warmup_steps=1,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=1,
        gradient_checkpointing=True,
        max_steps=500,
        learning_rate=2.5e-5, # Want a small lr for finetuning
        bf16=True,
        optim="paged_adamw_8bit",
        logging_steps=25,              # When to start reporting loss
        logging_dir="./logs",        # Directory for storing logs
        save_strategy="steps",       # Save the model checkpoint every logging step
        save_steps=25,                # Save checkpoints every 50 steps
        evaluation_strategy="steps", # Evaluate the model every logging step
        # eval_steps=25,               # Evaluate and save checkpoints every 50 steps
        do_eval=True,                # Perform evaluation at the end of training
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model_0_1.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

# phase 1-2 training

In [None]:
%%sh

pip install -U transformers

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

base_model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,  # Mistral, same as before
    quantization_config=bnb_config,  # Same quantization config as before
    device_map="auto",
    trust_remote_code=True,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/19 [00:00<?, ?it/s]

In [None]:
from peft import PeftModel

# take the best model from the previous checkpoint
run_name = "mistral2-mistral-finetune"
ft_model = PeftModel.from_pretrained(base_model, f"{run_name}/checkpoint-500").to("cuda")

In [None]:
# this is the model we will train to transition between phases 1 and 2
model_phase_1_2 = ft_model.base_model.model

In [None]:
model_phase_1_2

MixtralForCausalLM(
  (model): MixtralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MixtralDecoderLayer(
        (self_attn): MixtralAttention(
          (q_proj): Linear4bit(
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=4096, out_features=32, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=32, out_features=4096, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
          )
          (k_proj): Linear4bit(
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_featur

In [None]:
from datasets import load_dataset

In [None]:
import os

# now we use the larger, combined dataset for training
train_dataset = load_dataset('json', data_files='/content/drive/MyDrive/treehacks/combinedtraining.jsonl', split='train')
eval_dataset = load_dataset('json', data_files='/content/drive/MyDrive/treehacks/validation.jsonl', split='train')

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig

fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
)

accelerator = Accelerator(fsdp_plugin=fsdp_plugin)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,
)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
def generate_code_given_nlq_1_2(nlq):
    messages = [
      {"role": "user", "content": f"You are a terminal assistant. Your should respond to my query with a terminal command in MacOS that will solve the problem that I am having trouble with, and nothing else. I should be able to copy-paste your answer directly into my terminal and run it. For example, if I ask you how to list the contents of the current directory, your response should be `ls`. Please only respond with the command itself. Here is my query: \"{nlq}\""},
    ]

    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
    model_inputs = encodeds

    generated_ids = model_phase_1_2.generate(model_inputs, max_new_tokens=1000, do_sample=True)
    decoded = tokenizer.batch_decode(generated_ids)
    return decoded[0]

In [None]:
# nlq = "create a cron job that runs the script /home/scrape.sh every 24 hours starting from 8:00pm on Sunday, February 18th, 2024"
# print(generate_code_given_nlq_1_2(nlq))

In [None]:
from peft import prepare_model_for_kbit_training

model_phase_1_2.gradient_checkpointing_enable()
model_phase_1_2 = prepare_model_for_kbit_training(model_phase_1_2)

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

model_phase_1_2 = get_peft_model(model_phase_1_2, config)
print_trainable_parameters(model_phase_1_2)

trainable params: 28418048 || all params: 23511019520 || trainable%: 0.12087118542786188


In [None]:
print(model_phase_1_2)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MixtralForCausalLM(
      (model): MixtralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MixtralDecoderLayer(
            (self_attn): MixtralAttention(
              (q_proj): Linear4bit(
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
              )
              (k_proj): Linear4bit(
                (lora_dropout): ModuleDict(

In [None]:
model_phase_1_2 = accelerator.prepare_model(model_phase_1_2)

In [None]:
print(train_dataset)

Dataset({
    features: ['input', 'output'],
    num_rows: 19276
})


In [None]:
def generate_and_tokenize_prompt(prompt):
      messages = [
        {"role": "user", "content": f"You are a terminal assistant. Your should respond to my query with a terminal command in MacOS that will solve the problem that I am having trouble with, and nothing else. I should be able to copy-paste your answer directly into my terminal and run it. For example, if I ask you how to list the contents of the current directory, your response should be `ls`. Please only respond with the command itself. Here is my query: \"{prompt['input']}\""},
        {"role": "assistant", "content": prompt['output']},
      ]
      # encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
      tokens = tokenizer.apply_chat_template(messages, tokenize=True)
      return { 'input_ids': tokens, 'attention_mask': [1 for _ in range(len(tokens))] }

tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt)
tokenized_val_dataset = eval_dataset.map(generate_and_tokenize_prompt)

Map:   0%|          | 0/19276 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
import transformers
from datetime import datetime

project = "phase-1-2-mistral-finetune"
base_model_name = "mistral2"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name

# from peft import PeftModel
# print(model.config._use_perft)
# print(model)

# import transformers.trainer

# print(dir(transformers.trainer))
# print(isinstance(model, PeftModel))
# from peft.utils import is_peft_available
# print(is_peft_available())

model_phase_1_2.half()

trainer = transformers.Trainer(
    model=model_phase_1_2.to("cuda"),
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    args=transformers.TrainingArguments(
        output_dir=output_dir,
        warmup_steps=1,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=1,
        gradient_checkpointing=True,
        max_steps=500,
        learning_rate=2.5e-5, # Want a small lr for finetuning
        bf16=True,
        optim="paged_adamw_8bit",
        logging_steps=25,              # When to start reporting loss
        logging_dir="./logs",        # Directory for storing logs
        save_strategy="steps",       # Save the model checkpoint every logging step
        save_steps=25,                # Save checkpoints every 50 steps
        evaluation_strategy="steps", # Evaluate the model every logging step
        eval_steps=25,               # Evaluate and save checkpoints every 50 steps
        do_eval=True,                # Perform evaluation at the end of training
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model_phase_1_2.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()



Step,Training Loss,Validation Loss
25,2.8575,1.846487
50,1.0762,1.102981
75,0.76,0.807342
100,0.6231,4.800373
125,0.8719,0.694432
150,0.5144,0.643339
175,0.5609,0.641982
200,0.4884,0.624331
225,0.4356,0.637454
250,0.5797,0.582857




KeyboardInterrupt: 

# model evaluation

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

base_model_id = "mistralai/Mistral-7B-Instruct-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,  # Mistral, same as before
    quantization_config=bnb_config,  # Same quantization config as before
    device_map="auto",
    trust_remote_code=True,
)

eval_tokenizer = AutoTokenizer.from_pretrained(base_model_id, add_bos_token=True, trust_remote_code=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [None]:
run_name_0_1 = "mistral2-mistral-finetune"
run_name_1_2 = "mistral2-phase-1-2-mistral-finetune"

In [None]:
from peft import PeftModel

ft_model_0_1 = PeftModel.from_pretrained(base_model, f"{run_name_0_1}/checkpoint-500").to("cuda")
# ft_model_1_2 = PeftModel.from_pretrained(base_model, f"{run_name_1_2}/checkpoint-300").to("cuda")
ft_model_0_1.eval()
# ft_model_1_2.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): Linear4bit(
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=32, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=32, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
              )
              (k_proj): Linear4bit(
                (lora_dropout): ModuleDict(

In [None]:
# model_0 = AutoModelForCausalLM.from_pretrained(
#     base_model_id,
#     quantization_config=bnb_config,
#     device_map="auto",
#     trust_remote_code=True,
# )
# model_0_1 = ft_model_0_1.base_model.model
model_1_2 = ft_model_1_2.base_model.model

In [None]:
model_0

NameError: name 'model_0' is not defined

In [None]:
model_0_1

NameError: name 'model_0_1' is not defined

In [None]:
model_1_2
# model_1_2 = model_phase_1_2.base_model.model

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear4bit(
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=4096, out_features=32, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=32, out_features=4096, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
          )
          (k_proj): Linear4bit(
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_featur

In [None]:
def generate_code_given_nlq_ft(model, nlq):
    messages = [
      {"role": "user", "content": f"You are a terminal assistant. Your should respond to my query with a terminal command in MacOS that will solve the problem that I am having trouble with, and nothing else. I should be able to copy-paste your answer directly into my terminal and run it. For example, if I ask you how to list the contents of the current directory, your response should be `ls`. Please only respond with the command itself. Here is my query: \"{nlq}\""},
    ]

    encodeds = eval_tokenizer.apply_chat_template(messages, return_tensors="pt")
    model_inputs = encodeds
    generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
    decoded = eval_tokenizer.batch_decode(generated_ids)
    return decoded[0]

def generate_many_code_given_nlq_ft(model, nlq):
    messages = [
      {"role": "user", "content": f"You are a terminal assistant. Your should respond to my query with a terminal command in MacOS that will solve the problem that I am having trouble with, and nothing else. I should be able to copy-paste your answer directly into my terminal and run it. For example, if I ask you how to list the contents of the current directory, your response should be `ls`. Please only respond with the command itself. Here is my query: \"{nlq}\""},
    ]

    encodeds = eval_tokenizer.apply_chat_template(messages, return_tensors="pt")
    model_inputs = encodeds
    generated_ids = model.generate(model_inputs, max_new_tokens=100, num_return_sequences=3, num_beams=10, do_sample=True, output_scores=True)
    decoded = eval_tokenizer.batch_decode(generated_ids)
    return str(decoded)

In [None]:
%%sh

pip3 install openai==1.12.0



In [None]:
model_0_1.eval()
model_0_1 = model_0_1.base_model.model

NameError: name 'model_0_1' is not defined

In [None]:
model_0_1

In [None]:
from openai import OpenAI

client = OpenAI(api_key="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")

import json

def generate_code_given_nlq_openai(nlq):
  response = client.chat.completions.create(
      model = "gpt-3.5-turbo",
      messages=[
          {"role": "system", "content": "You are a terminal assistant. Your should respond to my query with a terminal command in MacOS that will solve the problem that I am having trouble with, and nothing else. I should be able to copy-paste your answer directly into my terminal and run it. For example, if I ask you how to list the contents of the current directory, you should respond with `ls`"},
          {"role": "user", "content": nlq}
      ]
  )
  return response.choices[0].message.content

def generate_openai_prompt(prompt):
  print("openai prompt", prompt)
  json_prompt = json.loads(prompt)
  response = client.chat.completions.create(model=json_prompt["model"], messages=json_prompt["messages"])
  return response.choices[0].message.content

def generate_many_code_given_nlq_openai(nlq):
  response = client.chat.completions.create(
      model = "gpt-3.5-turbo",
      messages=[
          {"role": "system", "content": "You are a terminal assistant. Your should respond to my query with a terminal command in MacOS that will solve the problem that I am having trouble with, and nothing else. I should be able to copy-paste your answer directly into my terminal and run it. For example, if I ask you how to list the contents of the current directory, you should respond with `ls`"},
          {"role": "user", "content": nlq}
      ]
  )
  return str(response.choices[0])

# server stuff

In [None]:
%%sh

pip -q install pyngrok

In [None]:
%%sh

pip -q install public_ip
pip -q install uvicorn nest_asyncio
pip install -q fastapi

In [None]:
from fastapi import FastAPI
import nest_asyncio
import uvicorn
import public_ip

In [None]:
from pydantic import BaseModel

class QueryParams(BaseModel):
    query: str
    model: str

class OpenAIReq(BaseModel):
    model: str
    messages: list

In [None]:
app = FastAPI()

In [None]:
@app.get('/')
async def root():
    return {"message":"root"}

In [None]:
@app.post('/zig')
async def zig(openai_req: OpenAIReq):
    messages = openai_req.messages
    print("messages", type(messages), messages)
    response = generate_code_given_nlq_ft(model_0_1,messages[0]["content"])
    index = response.index("[/INST]")
    if (index == -1):
        return {"data":response[:-4]}
    else:
        return {"data":response[index+8:-4]}
    return {"data":generate_code_given_nlq_ft(model_0_1,messages[0]["content"])}

In [None]:
@app.post('/generate_code')
async def generate_code(query_params : QueryParams):
    query = query_params.query
    model = query_params.model
    if model == "openai":
        return {"message":generate_code_given_nlq_openai(query)}
    elif model == "0":
        return {"message":generate_code_given_nlq_ft(model_0, query)}
    elif model == "1":
        return {"message":generate_code_given_nlq_ft(model_0_1, query)}
    elif model == "2":
        return {"message":generate_code_given_nlq_ft(model_1_2, query)}
    return {"message":"invalid query!"}

In [None]:
@app.post('/generate_many_code')
async def generate_many_code(query_params: QueryParams):
    query = query_params.query
    model = query_params.model
    if model == "openai":
        return {"message":generate_many_code_given_nlq_openai(query)}
    elif model == "0":
        return {"message":generate_many_code_given_nlq_ft(model_0, query)}
    elif model == "1":
        return {"message":generate_many_code_given_nlq_ft(model_0_1, query)}
    if model == "2":
        return {"message":generate_many_code_given_nlq_ft(model_1_2, query)}
    return {"message":"invalid query!"}

In [None]:
# @app.post('/openai')
# async def call_openai(openai_req: OpenAIReq):
#     return {"data":generate_openai_prompt(openai_req.json())}

In [None]:
%%sh

ngrok config add-authtoken 2YMYFw54vYbgxqSrZUGiQzvo7qN_58yNdtvTWPZ1hGjHPwVhJ

In [None]:
%%sh

pip install sse-starlette

In [None]:
import asyncio

status_stream_delay = 0.1  # second
status_stream_retry_timeout = 30000  # milisecond

async def status_event_generator(request, message):
    curmsg = message
    print(curmsg)
    while True:
        print(curmsg)
        if await request.is_disconnected():
            print('Request disconnected')
            break

        if len(curmsg) == 0:
            print('Request completed. Disconnecting now')
            yield {
                "event": "end",
                "data" : ''
            }
            break

        sendData = curmsg[:3] if len(curmsg) > 3 else curmsg
        yield {
            "event": "update",
            "data": ' '.join(sendData)
        }

        curmsg = curmsg[3:] if len(curmsg) > 3 else []
        await asyncio.sleep(status_stream_delay)

In [None]:
from fastapi import FastAPI, Request
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
from starlette.requests import Request
from starlette.responses import Response
from starlette.types import ASGIApp, Receive, Scope, Send

async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
    print("inside of this middleware")
    if scope["type"] != "http":
        await self.app(scope, receive, send)
        return

    request = Request(scope, receive=receive)
    response = await self.dispatch_func(request, self.call_next)
    await response(scope, receive, send)

In [None]:
# from sse_starlette.sse import EventSourceResponse
# from fastapi import APIRouter, Request

# @app.post('/openai')
# async def runStatus(
#         openai_req: OpenAIReq,
#     request: Request,
# ):
#     message = generate_openai_prompt(openai_req.json())
#     print(message, message.split())
#     event_generator = status_event_generator(request, message.split())
#     return EventSourceResponse(event_generator)

In [None]:
from pyngrok import ngrok
ngrok_tunnel = ngrok.connect(7000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=7000)

Public URL: https://8c4af8d9178c.ngrok.app


INFO:     Started server process [94488]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:7000 (Press CTRL+C to quit)


INFO:     35.237.4.214:0 - "GET / HTTP/1.1" 200 OK
INFO:     35.237.4.214:0 - "GET / HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'how do i use ls\n\x00'}]




INFO:     171.64.77.46:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': '\x1b[A\n\x00'}]
INFO:     171.64.77.50:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'test\n\x00'}]
INFO:     171.64.77.50:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'hi\n\x00'}]
INFO:     171.64.77.66:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'how do i ls human readable\n\x00'}]
INFO:     171.64.77.66:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'test\n\x00'}]
INFO:     171.64.77.65:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'ls human readable\n\x00'}]
INFO:     171.64.77.55:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'ls human readable\n\x00'}]
INFO:     171.64.77.55:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'ls human readable\n\x00'}]
INFO:     171.64.77.50:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'cron job for emailing me the sunrise time fetched from an external api every wednesday at 8pm\n\x00'}]
INFO:     171.64.77.50:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': '\x1b[A\n\x00'}]
INFO:     171.64.77.53:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'ls human readable\n\x00'}]
INFO:     171.64.77.63:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'cron job for emailing me sunrise time from external api every wednesday night\n\x00'}]
INFO:     171.64.77.53:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'change permissions of all files in a folder to be rwx by all\n\x00'}]
INFO:     171.64.77.50:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': '\n\x00'}]
INFO:     171.64.77.50:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': '\n\x00'}]
INFO:     171.64.77.50:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'how much space is my home directory taking up?\n\x00'}]
INFO:     171.64.77.63:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all files in the current directory that are writable by their owner\n\x00'}]
INFO:     171.64.77.63:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'recursively set all permissions under ../tools to 777\n\x00'}]
INFO:     171.64.77.60:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'enable dot glob shell option\n\x00'}]
INFO:     171.64.77.60:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'what percentage of my zshell history is git commits?\n\x00'}]
INFO:     171.64.77.60:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'show me top\n\x00'}]
INFO:     171.64.77.63:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'what percentage of my zshell history is git commits?\n\x00'}]
INFO:     171.64.77.63:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'what percentage of my zshell history is git commits?\n\x00'}]
INFO:     171.64.77.50:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'what percentage of my zshell history is git commits?\n\x00'}]
INFO:     171.64.77.50:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'tes\n\x00'}]
INFO:     68.65.169.182:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all files that are newer than january 1 2024 in the current directory\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all files that are newer than january 1 2024 in the current directory\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all .py files that are newer than february 16 2024 in the current directory\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all .py files that are newer than 2023 in the current directory\n\x00'}]
INFO:     68.65.169.182:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all .py files that are owned by albert in the current directory\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'how do i do a cron job that calls some external API url (to be specified) every wednesday at 8pm\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'how do i start a cron job that makes a post request to some api every wednesday at 6pm\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'hi\n\x00'}]
INFO:     68.65.169.182:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'ls human readable'}]
INFO:     68.65.169.182:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': "how do i make the output of ls human readable'\n\x00"}]
INFO:     68.65.169.182:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'how do i start a cron job that calls an api every wednesday at 8pm\n\x00'}]
INFO:     68.65.169.182:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'show me the contents of this folder\n\x00'}]
INFO:     68.65.169.186:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all the .py files in this folder that are owned by albert\n\x00'}]
INFO:     68.65.169.186:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'ls in human readable format\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all the .py files in ~/treehacks that are owned by albert\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all the *.py files in ~/treehacks that belong to albert\n\x00'}]
INFO:     68.65.169.183:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'list all the files in human readable format\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all the .py files in ~/treehacks that are owned by albert\n\x00'}]
INFO:     68.65.169.182:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'klsdfjkdlsjfkl\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'hi\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'how do i start a cron job to post some external api every wednesday at 6pm\n\x00'}]
INFO:     68.65.169.182:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'list all the files in the folder that are human readable\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'show system stats\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'install ffmpeg\n\x00'}]
INFO:     68.65.169.186:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': '😕\n\x00'}]
INFO:     68.65.169.180:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'list all files in a human readable format\n\x00'}]
INFO:     68.65.169.180:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all python files in my treehacks folder owned by albert\n\x00'}]
INFO:     68.65.169.182:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'go to my courses directory\n\x00'}]
INFO:     68.65.169.186:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all the python files in this directory\n\x00'}]
INFO:     68.65.169.186:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'list all the files in this directory in human readable format\n\x00'}]
INFO:     68.65.169.180:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all the python files in this directory that are owned by albert\n\x00'}]
INFO:     68.65.169.180:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'list all the files in this directory in  human readable format\n\x00'}]
INFO:     68.65.169.180:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'ls\n\x00'}]
INFO:     68.65.169.184:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all the python files in this folder'}]
INFO:     68.65.169.182:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'list all the files in this directory in human readable format\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all the python files in my ~/treehacks directory owned by albert\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'what version of python am i using?\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'list all files \n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': '\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'list all files in this directory\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'find all python files in the current directory owned by albert\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'list all files\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'list all files\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'help \n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'give me the time'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'can you code me a program that says hi every 10 seconds'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'can you code me a program that says hi every 10 seconds'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'can you code me a program that says hi every 10 seconds but only 10 times'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'can you code me a program that says hi every 1 seconds but only 10 times'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'write me a program that echos hello 10 times every 1 second\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'write me a program that echos hello 10 times every 1 second and enclose it in backticks\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'execute the zz command\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'i have an installed command called "zz" can you write a script to run the command\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'i have an executable at "/Users/albert/treehacks/zig_out/bin/zyzx". can you write a script to call this executable\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': '/Users/albert/treehack\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


messages <class 'list'> [{'role': 'user', 'content': 'write a self generating scrip[t\n\x00'}]
INFO:     2607:f6d0:ced:5b4:6c92:26a6:3200:dd21:0 - "POST /zig HTTP/1.1" 200 OK


In [None]:
%%sh

du -sh "mistral2-mistral-finetune/checkpoint-500"