In [14]:
import os
import gc

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer

import json
import wandb

In [15]:
def generate_text( foo_tokenizer, model, product, max_new_tokens=128 ):
    
    instruction = f"""### Instruction:
    Use the Task below and the Input given to write the Response, which is programmatic instruction that can solve the following Task:

    ### Task:
    Create a detailed description for the following product

    ### Input:
    {product}

    ### Response:
    """
    
    
    device = "cuda:0"
    inputs = foo_tokenizer( instruction, return_tensors="pt" ).to( device )
    
    generation_output = model.generate(
        input_ids=inputs[ "input_ids" ],
        attention_mask=inputs[ "attention_mask" ],
        max_new_tokens=max_new_tokens
    )
        
    print( "generation_output[ 0 ]:", generation_output[ 0 ], end="\n\n" )
    print( "generation_output[ 0 ].shape:", generation_output[ 0 ].shape, end="\n\n" )
    
    raw_output = foo_tokenizer.decode( generation_output[ 0 ] )
    
    print( "raw_output:", raw_output, end="\n\n" )
    print(  "len( raw_output ):", len( raw_output ), end="\n\n")
    
    response   = raw_output.split( "### Response:" )[ 1 ]
    
    return response

product = "Corelogic Smooth Mouse, belonging to category: Optical Mouse"

for line in generate_text( tokenizer, base_model, product ).split( "\n" ): print( line )

NameError: name 'tokenizer' is not defined

In [16]:
os.chdir( "/var/model" )

In [17]:
!ls -alh

total 14G
drwxrwxr-x  6 1001 1001 4.0K Nov 17 15:51 .
drwxr-xr-x  1 root root 4.0K Nov 17 15:36 ..
drwxr-xr-x  2 root root 4.0K Nov 11 03:01 .ipynb_checkpoints
-rw-rw-r--  1 1001 1001 6.9K Nov  5 17:36 LICENSE
-rw-rw-r--  1 1001 1001 6.1K Nov  5 17:36 README.md
-rw-rw-r--  1 1001 1001 4.7K Nov  5 17:36 USE_POLICY.md
-rw-r--r--  1 root root 9.3K Nov 11 02:12 code-llama-instruct-7b-peft.ipynb
-rw-r--r--  1 1001 1001  97K Nov 11 03:08 code-llama-instruct-7b.ipynb
-rw-rw-r--  1 1001 1001  646 Nov  5 17:36 config.json
-rw-rw-r--  1 1001 1001  116 Nov  5 17:36 generation_config.json
drwxr-xr-x  2 root root 4.0K Nov 17 15:47 merged
-rw-rw-r--  1 1001 1001 9.3G Nov  5 17:42 model-00001-of-00002.safetensors
-rw-rw-r--  1 1001 1001 3.3G Nov  5 17:40 model-00002-of-00002.safetensors
-rw-rw-r--  1 1001 1001  25K Nov  5 17:36 model.safetensors.index.json
-rw-rw-r--  1 1001 1001  24K Nov  5 17:36 pytorch_model.bin.index.json
-rw-rw-r--  1 1001 1001  411 Nov  5 17:36 special_tokens_ma

In [18]:
wandb.login()

True

In [19]:
%env WANDB_PROJECT=codellama-7b-instruct-hf-peft-fine-tuning

env: WANDB_PROJECT=codellama-7b-instruct-hf-peft-fine-tuning


In [20]:
dataset_name = "iamtarun/python_code_instructions_18k_alpaca"
split = "train[:10%]"
finetunes_model_name = "output/codellama-7b-finetuned-int4-python-18k-alpaca"

In [21]:
from datasets import load_dataset

dataset = load_dataset( dataset_name, split=split )

In [22]:
def prompt_instruction_format( sample ):
    
  return f"""### Instruction:
    Use the Task below and the Input given to write the Response, which is programmatic instruction that can solve the following Task:

    ### Task:
    {sample['instruction']}

    ### Input:
    {sample['input']}

    ### Response:
    {sample['output']}
    """

In [23]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
print( bnb_config )
tokenizer              = AutoTokenizer.from_pretrained( "." )
tokenizer.pad_token    = tokenizer.eos_token
tokenizer.padding_side = "right"

# ¡OJO! Why are we turning off the cash here? 
# We're not! It makes a huge performance difference: 21 vs 14 tokens per second!
base_model = AutoModelForCausalLM.from_pretrained(
    ".", quantization_config=bnb_config, device_map="auto", low_cpu_mem_usage=True, use_cache=True
)

BitsAndBytesConfig {
  "bnb_4bit_compute_dtype": "bfloat16",
  "bnb_4bit_quant_type": "nf4",
  "bnb_4bit_use_double_quant": true,
  "llm_int8_enable_fp32_cpu_offload": false,
  "llm_int8_has_fp16_weight": false,
  "llm_int8_skip_modules": null,
  "llm_int8_threshold": 6.0,
  "load_in_4bit": true,
  "load_in_8bit": false,
  "quant_method": "bitsandbytes"
}


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [24]:
for line in generate_text( tokenizer, base_model, product ).split( "\n" ): print( line )

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


generation_output[ 0 ]: tensor([    1,   835,  2799,  4080, 29901,    13,  1678,  4803,   278,  9330,
         2400,   322,   278, 10567,  2183,   304,  2436,   278, 13291, 29892,
          607,   338,  1824, 29885,  2454, 15278,   393,   508,  4505,   278,
         1494,  9330, 29901,    13,    13,  1678,   835,  9330, 29901,    13,
         1678,  6204,   263, 13173,  6139,   363,   278,  1494,  3234,    13,
           13,  1678,   835, 10567, 29901,    13,  1678,  2994,   295,   468,
          293,  4116,  6983, 25992, 29892, 23329,   304,  7663, 29901, 20693,
          936, 25992,    13,    13,  1678,   835, 13291, 29901,    13,   268,
        29896, 29889, 10969,  4408, 29901,  2994,   295,   468,   293,  4116,
         6983, 25992,    13,   268, 29906, 29889, 10969, 12953, 29901,   450,
         2994,   295,   468,   293,  4116,  6983, 25992,   338,   263,  1880,
        29899, 29567, 27070,  9495,  8688,   304,  3867,   263, 10597,   322,
        18378,  7271,   363,  4160, 2988

In [10]:
# import gc
# del model
# torch.cuda.empty_cache()
# gc.collect()

In [13]:
base_model.hf_device_map

{'model.embed_tokens': 0,
 'model.layers.0': 0,
 'model.layers.1': 0,
 'model.layers.2': 0,
 'model.layers.3': 0,
 'model.layers.4': 0,
 'model.layers.5': 0,
 'model.layers.6': 0,
 'model.layers.7': 0,
 'model.layers.8': 0,
 'model.layers.9': 0,
 'model.layers.10': 0,
 'model.layers.11': 0,
 'model.layers.12': 0,
 'model.layers.13': 1,
 'model.layers.14': 1,
 'model.layers.15': 1,
 'model.layers.16': 1,
 'model.layers.17': 1,
 'model.layers.18': 1,
 'model.layers.19': 1,
 'model.layers.20': 1,
 'model.layers.21': 1,
 'model.layers.22': 1,
 'model.layers.23': 1,
 'model.layers.24': 1,
 'model.layers.25': 1,
 'model.layers.26': 1,
 'model.layers.27': 1,
 'model.layers.28': 1,
 'model.layers.29': 1,
 'model.layers.30': 1,
 'model.layers.31': 1,
 'model.norm': 1,
 'lm_head': 1}

In [40]:
# for name, param in base_model.named_parameters():
#     print(f"Parameter {name} is on device {param.device}")

## Set up training arguments

In [25]:
from peft import LoraConfig, get_peft_config, PeftModel, PeftConfig, get_peft_model, AutoPeftModelForCausalLM

peft_config = LoraConfig(
    r=16, 
    lora_alpha=32, 
    # When target_modules was disabled, it was causing detention layers to be assigned to the CPU, throwing this runtime error:
    # RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! 
    # (when checking argument for argument mat2 in method wrapper_CUDA_mm)
    target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ], 
    lora_dropout=0.10, 
    bias="none", 
    task_type="CAUSAL_LM"
)

In [26]:
# Define the training arguments
trainingArgs = TrainingArguments(
    output_dir="./training-results", # Output directory where the model predictions and checkpoints will be stored
    num_train_epochs=3, # Number of training epochs
    per_device_train_batch_size=4, # Batch size per GPU for training
    gradient_accumulation_steps=2,  # Number of update steps to accumulate the gradients for
    gradient_checkpointing=True,# Enable gradient checkpointing
    optim="paged_adamw_32bit", # Optimizer to use
    #save_steps=save_steps,
    logging_steps=5,
    save_strategy="epoch",
    learning_rate=2e-4,
    weight_decay=0.001,
    # fp16=True,
    bf16=False,
    # tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    #max_steps=max_steps,
    group_by_length=False,
    lr_scheduler_type="cosine",
    disable_tqdm=True,
    report_to="wandb",
    seed=42
)
# Create the trainer
trainer = SFTTrainer(
    model=base_model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length=2048,
    tokenizer=tokenizer,
    packing=True,
    formatting_func=prompt_instruction_format,
    args=trainingArgs,
)

In [27]:
base_model.print_trainable_parameters()

AttributeError: 'LlamaForCausalLM' object has no attribute 'print_trainable_parameters'

In [28]:
for name, param in base_model.named_parameters():
    print(f"Parameter {name} is on {param.device}")


Parameter model.embed_tokens.weight is on cuda:0
Parameter model.layers.0.self_attn.q_proj.lora_A.default.weight is on cuda:0
Parameter model.layers.0.self_attn.q_proj.lora_B.default.weight is on cuda:0
Parameter model.layers.0.self_attn.q_proj.base_layer.weight is on cuda:0
Parameter model.layers.0.self_attn.k_proj.lora_A.default.weight is on cuda:0
Parameter model.layers.0.self_attn.k_proj.lora_B.default.weight is on cuda:0
Parameter model.layers.0.self_attn.k_proj.base_layer.weight is on cuda:0
Parameter model.layers.0.self_attn.v_proj.lora_A.default.weight is on cuda:0
Parameter model.layers.0.self_attn.v_proj.lora_B.default.weight is on cuda:0
Parameter model.layers.0.self_attn.v_proj.base_layer.weight is on cuda:0
Parameter model.layers.0.self_attn.o_proj.lora_A.default.weight is on cuda:0
Parameter model.layers.0.self_attn.o_proj.lora_B.default.weight is on cuda:0
Parameter model.layers.0.self_attn.o_proj.base_layer.weight is on cuda:0
Parameter model.layers.0.mlp.gate_proj.lora

In [29]:
trainer.train()

#stop reporting to wandb
wandb.finish()

# save model
trainer.save_model()

print("Model saved")

You're using a CodeLlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


{'loss': 0.7425, 'learning_rate': 4.761904761904762e-05, 'epoch': 0.02}
{'loss': 0.6891, 'learning_rate': 9.523809523809524e-05, 'epoch': 0.04}
{'loss': 0.6339, 'learning_rate': 0.00014285714285714287, 'epoch': 0.06}
{'loss': 0.6216, 'learning_rate': 0.00019047619047619048, 'epoch': 0.09}
{'loss': 0.5063, 'learning_rate': 0.00019998282416292055, 'epoch': 0.11}




{'loss': 0.5331, 'learning_rate': 0.00019991305743680013, 'epoch': 1.01}
{'loss': 0.5162, 'learning_rate': 0.00019978966374934254, 'epoch': 1.03}
{'loss': 0.5119, 'learning_rate': 0.00019961270933041477, 'epoch': 1.05}
{'loss': 0.4702, 'learning_rate': 0.0001993822891578708, 'epoch': 1.07}
{'loss': 0.5098, 'learning_rate': 0.00019909852690657359, 'epoch': 1.09}
{'loss': 0.4685, 'learning_rate': 0.00019876157488201424, 'epoch': 1.11}




{'loss': 0.485, 'learning_rate': 0.0001983716139385641, 'epoch': 2.01}
{'loss': 0.4565, 'learning_rate': 0.00019792885338240374, 'epoch': 2.03}
{'loss': 0.4662, 'learning_rate': 0.0001974335308591806, 'epoch': 2.06}
{'loss': 0.4473, 'learning_rate': 0.00019688591222645607, 'epoch': 2.08}
{'loss': 0.414, 'learning_rate': 0.00019628629141101012, 'epoch': 2.1}
{'loss': 0.4145, 'learning_rate': 0.00019563499025107998, 'epoch': 2.12}
{'train_runtime': 1330.6939, 'train_samples_per_second': 4.196, 'train_steps_per_second': 0.525, 'train_loss': 0.5255529887535992, 'epoch': 2.12}


VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/epoch,▁▁▁▁▁▄▄▄▅▅▅███████
train/global_step,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇███
train/learning_rate,▁▃▅██████████████
train/loss,█▇▆▅▃▄▃▃▂▃▂▃▂▂▂▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁
train/train_samples_per_second,▁
train/train_steps_per_second,▁

0,1
train/epoch,2.12
train/global_step,85.0
train/learning_rate,0.0002
train/loss,0.4145
train/total_flos,5.538112860900557e+16
train/train_loss,0.52555
train/train_runtime,1330.6939
train/train_samples_per_second,4.196
train/train_steps_per_second,0.525


Model saved


In [19]:
# wandb.finish()

In [16]:
# # Drops 16.4/19.0 GB per GPU down to 3.25 GB per GPU!
# del base_model
# torch.cuda.empty_cache() 
# gc.collect()

In [17]:
# load the trained model from the output directory
trained_model = AutoPeftModelForCausalLM.from_pretrained(
    trainingArgs.output_dir,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.bfloat16,
    device_map="auto", 
    load_in_8bit=True
    # quantization_config=bnb_config
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [30]:
product = "Corelogic Smooth Mouse, belonging to category: Optical Mouse"

for line in generate_text( tokenizer, trained_model, product ).split( "\n" ): print( line )

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


generation_output[ 0 ]: tensor([    1,   835,  2799,  4080, 29901,    13,  1678,  4803,   278,  9330,
         2400,   322,   278, 10567,  2183,   304,  2436,   278, 13291, 29892,
          607,   338,  1824, 29885,  2454, 15278,   393,   508,  4505,   278,
         1494,  9330, 29901,    13,    13,  1678,   835,  9330, 29901,    13,
         1678,  6204,   263, 13173,  6139,   363,   278,  1494,  3234,    13,
           13,  1678,   835, 10567, 29901,    13,  1678,  2994,   295,   468,
          293,  4116,  6983, 25992, 29892, 23329,   304,  7663, 29901, 20693,
          936, 25992,    13,    13,  1678,   835, 13291, 29901,    13,   268,
           13,  1678,   450,  2994,   295,   468,   293,  4116,  6983, 25992,
          338,   263,  1880, 29899, 29567, 27070,  9495,  8688,   363, 18378,
          322, 10597, 10298, 29889,   739,  5680,   263, 12844,  1416,   322,
        23682,  4917,   293,  2874, 29892,  3907,   372, 25561,   304,   671,
          363, 10410, 23704, 29889,   45

In [21]:
# Merge LoRA with the base model and save the merged model
lora_merged_model = trained_model.merge_and_unload()
lora_merged_model.save_pretrained( "merged", safe_serialization=False )
tokenizer.save_pretrained( "merged" )



('merged/tokenizer_config.json',
 'merged/special_tokens_map.json',
 'merged/tokenizer.model',
 'merged/added_tokens.json',
 'merged/tokenizer.json')

In [31]:
product = "Corelogic Smooth Mouse, belonging to category: Optical Mouse"

for line in generate_text( tokenizer, lora_merged_model, product ).split( "\n" ): print( line )

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


generation_output[ 0 ]: tensor([    1,   835,  2799,  4080, 29901,    13,  1678,  4803,   278,  9330,
         2400,   322,   278, 10567,  2183,   304,  2436,   278, 13291, 29892,
          607,   338,  1824, 29885,  2454, 15278,   393,   508,  4505,   278,
         1494,  9330, 29901,    13,    13,  1678,   835,  9330, 29901,    13,
         1678,  6204,   263, 13173,  6139,   363,   278,  1494,  3234,    13,
           13,  1678,   835, 10567, 29901,    13,  1678,  2994,   295,   468,
          293,  4116,  6983, 25992, 29892, 23329,   304,  7663, 29901, 20693,
          936, 25992,    13,    13,  1678,   835, 13291, 29901,    13,   268,
           13,  1678,   450,  2994,   295,   468,   293,  4116,  6983, 25992,
          338,   263,  1880, 29899, 29567, 27070,  9495,  8688,   363, 18378,
          322, 10597, 10298, 29889,   739,  5680,   263, 12844,  1416,   322,
        23682,  4917,   293,  2874, 29892,  3907,   372, 25561,   304,   671,
          363, 10410, 23704, 29889,   45

## Load the merged model

In [32]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
merged_tokenizer              = AutoTokenizer.from_pretrained( "merged" )
merged_tokenizer.pad_token    = tokenizer.eos_token
merged_tokenizer.padding_side = "right"

# ¡OJO! Why are we turning off the cash here? 
# We're not! It makes a huge performance difference: 21 vs 14 tokens per second!
merged_model = AutoModelForCausalLM.from_pretrained(
    "merged", quantization_config=bnb_config, device_map="auto", low_cpu_mem_usage=True, use_cache=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [33]:
product = "Corelogic Smooth Mouse, belonging to category: Optical Mouse"

for line in generate_text( merged_tokenizer, merged_model, product ).split( "\n" ): print( line )

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


generation_output[ 0 ]: tensor([    1,   835,  2799,  4080, 29901,    13,  1678,  4803,   278,  9330,
         2400,   322,   278, 10567,  2183,   304,  2436,   278, 13291, 29892,
          607,   338,  1824, 29885,  2454, 15278,   393,   508,  4505,   278,
         1494,  9330, 29901,    13,    13,  1678,   835,  9330, 29901,    13,
         1678,  6204,   263, 13173,  6139,   363,   278,  1494,  3234,    13,
           13,  1678,   835, 10567, 29901,    13,  1678,  2994,   295,   468,
          293,  4116,  6983, 25992, 29892, 23329,   304,  7663, 29901, 20693,
          936, 25992,    13,    13,  1678,   835, 13291, 29901,    13,   268,
           13,  1678,   450,  2994,   295,   468,   293,  4116,  6983, 25992,
          338,   263,  1880, 29899, 29567, 27070,  9495,  8688,   363, 18378,
          322, 10597, 10298, 29889,   739,  5680,   263, 12844,  1416,   322,
        23682,  4917,   293,  2874, 29892,  3907,   372, 25561,   304,   671,
          363, 10410, 23704, 29889,   45

In [33]:
def generate_code( model, max_new_tokens=512 ):
    
    instruction = f"""### Instruction:
    Use the Task below and the Input given to write the Response, which is programmatic instruction that can solve the following Task:

    ### Task:
    Write a function in python that calculates the haversine distance between two points. 

    ### Input:
    As you generate the python code needed to answer this request, I want you to:

    1) Question: Ask yourself if you understand the question that I am asking you.  Pay attention to the details!
    2) Think: Before you do anything, think out loud about what I am asking you to do, including what are the steps that you will need to take to solve this problem. Be critical of your thought process!
    3) Code: Generate a verbatim list of code that you used to arrive at your answer, one line of code per item on the list. The code must be complete, syntactically correct, and capable of running to completion. The last line of your code must be the variable `solution`, which represents the answer. Make sure that any filtering you perform matches the question asked of you by the user!
    4) Return: Report on the object type of the variable `solution` in your last line of code. Use one word to represent the object type.
    
    Format: return your response as a JSON object with the following fields:
    question: The question, verbatim and without modification,
    thoughts: Your thoughts,
    code: A list of strings, each string representing a line of code in your solution.
    returns: Object type of the variable `solution`,
    error: Your description of any issues or errors that you encountered while attempting to fulfill this request

    ### Response:
    """
    
    
    device = "cuda:0"
    inputs = tokenizer( instruction, return_tensors="pt" ).to( device )
    
    generation_output = model.generate(
        input_ids=inputs[ "input_ids" ],
        attention_mask=inputs[ "attention_mask" ],
        max_new_tokens=max_new_tokens
    )
        
    print( generation_output[ 0 ], end="\n\n" )
    print( generation_output[ 0 ].shape, end="\n\n" )
    
    raw_output = tokenizer.decode( generation_output[ 0 ] )
    print( len( raw_output ), end="\n\n")
    
    response   = raw_output.split( "### Response:" )[ 1 ]
    
    return response

for line in generate_code( base_model ).split( "\n" ): print( line )

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


tensor([    1,   835,  2799,  4080, 29901,    13,  1678,  4803,   278,  9330,
         2400,   322,   278, 10567,  2183,   304,  2436,   278, 13291, 29892,
          607,   338,  1824, 29885,  2454, 15278,   393,   508,  4505,   278,
         1494,  9330, 29901,    13,    13,  1678,   835,  9330, 29901,    13,
         1678, 14350,   263,   740,   297,  3017,   393,  3408,  1078,   278,
          447,   874,   457,  5418,  1546,  1023,  3291, 29889, 29871,    13,
           13,  1678,   835, 10567, 29901,    13,  1678,  1094,   366,  5706,
          278,  3017,   775,  4312,   304,  1234,   445,  2009, 29892,   306,
          864,   366,   304, 29901,    13,    13,   268, 29896, 29897,   894,
        29901, 26579,  7535,   565,   366,  2274,   278,  1139,   393,   306,
          626,  6721,   366, 29889, 29871, 14617,  8570,   304,   278,  4902,
        29991,    13,   268, 29906, 29897, 25086, 29901, 10949,   366,   437,
         3099, 29892,  1348,   714, 22526,  1048,   825,   306, 

In [30]:
json_prompt = """
Write a function in python that calculates the haversine distance between two points. 

As you generate the python code needed to answer this request, I want you to:

1) Question: Ask yourself if you understand the question that I am asking you.  Pay attention to the details!
2) Think: Before you do anything, think out loud about what I am asking you to do, including what are the steps that you will need to take to solve this problem. Be critical of your thought process!
3) Code: Generate a verbatim list of code that you used to arrive at your answer, one line of code per item on the list. The code must be complete, syntactically correct, and capable of running to completion. The last line of your code must be the variable `solution`, which represents the answer. Make sure that any filtering you perform matches the question asked of you by the user!
4) Return: Report on the object type of the variable `solution` in your last line of code. Use one word to represent the object type.

Format: return your response as a JSON object with the following fields:
question: The question, verbatim and without modification,
thoughts: Your thoughts,
code: A list of strings, each string representing a line of code in your solution.
returns: Object type of the variable `solution`,
error: Your description of any issues or errors that you encountered while attempting to fulfill this request"""

In [34]:
generate_text( json_prompt, max_new_tokens=512 )

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'<s> \nWrite a function in python that calculates the haversine distance between two points. \n\nAs you generate the python code needed to answer this request, I want you to:\n\n1) Question: Ask yourself if you understand the question that I am asking you.  Pay attention to the details!\n2) Think: Before you do anything, think out loud about what I am asking you to do, including what are the steps that you will need to take to solve this problem. Be critical of your thought process!\n3) Code: Generate a verbatim list of code that you used to arrive at your answer, one line of code per item on the list. The code must be complete, syntactically correct, and capable of running to completion. The last line of your code must be the variable `solution`, which represents the answer. Make sure that any filtering you perform matches the question asked of you by the user!\n4) Return: Report on the object type of the variable `solution` in your last line of code. Use one word to represent the obj

In [38]:
response_string = """
{\n  "question": "Write a function in python that calculates the haversine distance between two points.",\n  "thoughts": "The haversine formula is used to calculate the distance between two points on the Earth\'s surface. It takes into account the latitude and longitude of the two points, as well as the radius of the Earth. The formula is given by:\n\nd = 2 * arcsin(sqrt(sin^2(delta_lat/2) + cos(lat1) * cos(lat2) * sin^2(delta_lon/2)))\n\nwhere d is the distance between the two points, lat1 and lat2 are the latitudes of the two points, lon1 and lon2 are the longitudes of the two points, and delta_lat and delta_lon are the differences between the latitudes and longitudes of the two points.",\n  "code": [\n    "import math"\n  ],\n  "returns": "float",\n  "explanation": "The haversine formula is used to calculate the distance between two points on the Earth\'s surface. It takes into account the latitude and longitude of the two points, as well as the radius of the Earth. The formula is given by:\n\nd = 2 * arcsin(sqrt(sin^2(delta_lat/2) + cos(lat1) * cos(lat2) * sin^2(delta_lon/2)))\n\nwhere d is the distance between the two points, lat1 and lat2 are the latitudes of the two points, lon1 and lon2 are the longitudes of the two points, and delta_lat and delta_lon are the differences between the latitudes and longitudes of the two points.",\n  "error": ""\n}"""

len( response_string.split( " " ) )

226

In [43]:
device = "cuda:0"
inputs = tokenizer( response_string, return_tensors="pt" ).to( device )
len( inputs[ "input_ids" ][ 0 ] ) / 16

24.375

In [41]:
import json

In [42]:
response_dict = json.loads( response_string )
response_dict

JSONDecodeError: Invalid control character at: line 4 column 243 (char 348)

In [17]:
dataset[ 0 ]

{'instruction': 'Create a function to calculate the sum of a sequence of integers.',
 'input': '[1, 2, 3, 4, 5]',
 'output': '# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum',
 'prompt': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a function to calculate the sum of a sequence of integers.\n\n### Input:\n[1, 2, 3, 4, 5]\n\n### Output:\n# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum'}

In [18]:
foo = """
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Write a function in python that calculates the haversine distance between two points

### Input:
As you generate the python code needed to answer this request, I want you to:

1) Question: Ask yourself if you understand the question that I am asking you.  Pay attention to the details!
2) Think: Before you do anything, think out loud about what I am asking you to do, including what are the steps that you will need to take to solve this problem. Be critical of your thought process!
3) Code: Generate the python code that you need to arrive at your answer. The code must be complete, syntactically correct, and capable of running to completion. The last line of your code must be `return solution`. 
4) Return: Report on the object type of the variable `solution` in your last line of code. Use one word to represent the object type.

Format: return your response as a JSON object with the following fields:

question: The question, verbatim and without modification,
thoughts: Your thoughts about the question,
code: You python code solution.
returns: Object type of the return variable `solution`,
error: Your description of any issues or errors that you encountered while attempting to fulfill this request

### Output:
"""