#Load required libraries

In [2]:
!pip install -q -U transformers peft accelerate optimum
!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu117/
!pip install -q datasets
!pip install loralib==0.1.1

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m403.3/403.3 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
import torch
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import load_dataset
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

#Train quantized model using PEFT

In [4]:
# Specify the model ID to be loaded:

model_id = "TheBloke/Llama-2-7b-Chat-GPTQ"

# Define the quantization configuration:
gptq_config  = GPTQConfig(bits=4  # Quantize model weights to 4 bits for reduced size and faster inference.
                          , disable_exllama=True) # disabled the exllama kernel because training with exllama kernel is unstable

# Load the quantized model:
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=gptq_config ,
                                             device_map="auto", # Automatically distribute the model across available devices (if applicable)
                                             trust_remote_code=True) # Necessary for loading models with custom code components.

Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.


config.json:   0%|          | 0.00/789 [00:00<?, ?B/s]

You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. ['use_cuda_fp16', 'use_exllama', 'max_input_length', 'exllama_config', 'disable_exllama']) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


model.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [5]:
# Load the appropriate tokenizer for the specified model:
tokenizer = AutoTokenizer.from_pretrained(model_id)

tokenizer_config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

In [6]:
model.config.quantization_config.to_dict()

{'quant_method': <QuantizationMethod.GPTQ: 'gptq'>,
 'bits': 4,
 'tokenizer': None,
 'dataset': None,
 'group_size': 128,
 'damp_percent': 0.01,
 'desc_act': False,
 'sym': True,
 'true_sequential': True,
 'use_cuda_fp16': False,
 'model_seqlen': None,
 'block_name_to_quantize': None,
 'module_name_preceding_first_block': None,
 'batch_size': 1,
 'pad_token_id': None,
 'use_exllama': False,
 'max_input_length': None,
 'exllama_config': {'version': <ExllamaVersion.ONE: 1>},
 'cache_block_outputs': True}

In [7]:
model.gradient_checkpointing_enable() # Activate gradient checkpointing for memory optimization during training.
model = prepare_model_for_kbit_training(model) # Apply necessary modifications for K-bit training

In [8]:
r=8 # Set the rank for low-rank attention approximation
alpha=32 # Set the scaling factor for attention scores
dropout=0.05 # Set the dropout rate for regularization

config = LoraConfig(
    r=r,
    lora_alpha=alpha,
    target_modules=["k_proj","o_proj","q_proj","v_proj"],  # Apply LoRA to these specific projection modules.
    lora_dropout=dropout,
    bias="none",    # Disable biases in attention modules
    task_type="CAUSAL_LM"  # Specify the model's task as causal language modeling.
)

model = get_peft_model(model, config)  # Apply PEFT (Predictive Efficient Fine-Tuning) to the model using the LoRA configuration.
model.print_trainable_parameters()

trainable params: 8,388,608 || all params: 270,798,848 || trainable%: 3.097726619575575


#Load the dataset

In [9]:
from datasets import load_dataset
data = load_dataset("ttbui/alpaca_webgen_html", split="train")
data

Downloading data:   0%|          | 0.00/303k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['output', 'instruction', 'input'],
    num_rows: 528
})

In [10]:
def tokenize_function(dataset):
  #Data Structure Check:
    if "instruction" in dataset and "output" in dataset:
    #Prompt Construction:
      prompt_template = "Below is instruction that describes a task to code in HTML,what is output in HTML: \n \n'"
      instruction = dataset["instruction"][0]
      response = dataset["output"][0]

      text_with_prompt = (prompt_template +
                          '### Instruction: \n' +instruction +
                          '\n ### Response: \n' + response)

    #Tokenization
    tokenizer.pad_token = tokenizer.eos_token  # Set padding token to the end-of-sentence token
    tokenized_inputs = tokenizer(
        text_with_prompt,
        return_tensors="np",   #Return NumPy tensors
        padding=True,    #Pad sequences to equal length
    )

    max_length = min(
        tokenized_inputs["input_ids"].shape[1],
        2048    # Set maximum length to 2048 or the actual length, whichever is shorter
    )
    tokenizer.truncation_side = "left"  # Truncate from the left if necessary
    tokenized_inputs = tokenizer(
        text_with_prompt,
        return_tensors="np",
        truncation=True,   # Enable truncation
        max_length=max_length
    )

    return tokenized_inputs

In [11]:
#Tokenization Mapping
tokenized_dataset = data.map(
    tokenize_function,
    batched=True,
    batch_size=1,
    drop_last_batch=True
)

Map:   0%|          | 0/528 [00:00<?, ? examples/s]

In [12]:
tokenized_dataset

Dataset({
    features: ['output', 'instruction', 'input', 'input_ids', 'attention_mask'],
    num_rows: 528
})

In [13]:
#splitting into Testing and training sets
data_split = tokenized_dataset.train_test_split(test_size=0.25, shuffle=True, seed=123)
data_split

DatasetDict({
    train: Dataset({
        features: ['output', 'instruction', 'input', 'input_ids', 'attention_mask'],
        num_rows: 396
    })
    test: Dataset({
        features: ['output', 'instruction', 'input', 'input_ids', 'attention_mask'],
        num_rows: 132
    })
})

#Check base model results

In [14]:
def is_exact_match(a, b):
    return a.strip() == b.strip()

model.eval()

def inference(text, model, tokenizer, max_input_tokens=1000, max_output_tokens=100):
  # Tokenize
  input_ids = tokenizer.encode(
          text,
          return_tensors="pt",
          truncation=True,
          max_length=max_input_tokens
  )

  # Generate
  device = model.device
  generated_tokens_with_prompt = model.generate(
    input_ids=input_ids.to(device),
    max_length=max_output_tokens
  )

  # Decode
  generated_text_with_prompt = tokenizer.batch_decode(generated_tokens_with_prompt, skip_special_tokens=True)

  # Strip the prompt
  generated_text_answer = generated_text_with_prompt[0][len(text):]

  return generated_text_answer

In [15]:
# Retrieve a specific test question from the dataset:
test_question = data_split["test"]['instruction'][2]

# Generate an answer using the model and tokenizer:
generated_answer = inference(test_question, model, tokenizer)

print(test_question)
print(generated_answer)



Create a simple HTML webpage without using any external stylesheets.


Create a simple HTML webpage without using any external stylesheets. The webpage should have a header, a paragraph of text, and a link to another webpage.

Here is the HTML code for the webpage:
```
<!DOCTYPE html>
<html>
  <head>
    <title>My Simple HTML Page</title>
  </head>
  <body>
   


#Train the model

In [16]:
# Set padding token for consistent length handling:
tokenizer.pad_token = tokenizer.eos_token

# Create a Trainer instance for model training:
trainer = Trainer(
    model=model,                          # Specify the model to be trained
    train_dataset=data_split["train"],
    args=TrainingArguments(               # Configure training settings
        per_device_train_batch_size=1,    # Process 1 batch per device per gradient update.
        gradient_accumulation_steps=4,    # Accumulate gradients over 4 steps for effective batch size of 4.
        warmup_steps=2,                   # Gradually increase learning rate over 2 initial steps.
        max_steps=10,                     # Train for a maximum of 10 steps (adjust for actual training).
        learning_rate=2e-4,               # Set the learning rate
        fp16=True,                        # Enable mixed-precision training for potential speedup.
        logging_steps=1,                  # Log training progress every step.
        output_dir="outputs",             # Save model checkpoints and logs in the "outputs" directory
        optim="adamw_hf"                  # Use the AdamW optimizer with Hugging Face modifications
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)  # Prepare text inputs for language modeling
)

In [17]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
1,2.2931
2,1.6257
3,1.4547
4,2.1299
5,1.2179
6,1.3383
7,1.1395
8,1.2204
9,1.1505
10,1.1092


TrainOutput(global_step=10, training_loss=1.4679259538650513, metrics={'train_runtime': 129.6963, 'train_samples_per_second': 0.308, 'train_steps_per_second': 0.077, 'total_flos': 5944818475008.0, 'train_loss': 1.4679259538650513, 'epoch': 0.1})

In [18]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (rotary_emb): LlamaRotaryEmbedding()
              (k_proj): QuantLinear(
                (base_layer): QuantLinear()
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (quant_linear_module): QuantLinear()
              )
              (

#Save the model

In [19]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model  # Take care of distributed/parallel training
model_to_save.save_pretrained("outputs")

In [20]:
#from huggingface_hub import login
#login()

In [21]:
#model.push_to_hub("HTML-finetunined-WORK-A")

#Load the fintuned model from local

In [22]:
output_dir = "/content/outputs"

In [23]:
gptq_config = GPTQConfig(bits=4, disable_exllama=True)

trained_model = AutoModelForCausalLM.from_pretrained(
output_dir, local_files_only=True,
quantization_config=gptq_config,
trust_remote_code=True, device_map="auto"
)

Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. ['use_cuda_fp16', 'use_exllama', 'max_input_length', 'exllama_config', 'disable_exllama']) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


#Evaluate the model

Run model and compare to expected answer

In [25]:
test_question = data_split["test"]['instruction'][2]
generated_answer = inference(test_question, trained_model, tokenizer)
print(test_question)
print(generated_answer)

Create a simple HTML webpage without using any external stylesheets.


Here is an example of a simple HTML webpage without any external stylesheets:
```
<!DOCTYPE html>
<html>
  <head>
    <title>My Simple HTML Page</title>
  </head>
  <body>
    <h1>Hello World!</h1>
  </body>
</html>
```
This is a basic


In [26]:
answer = data_split["test"]['output'][2]
print(answer)

<html>
<head>
  <title>My Web Page</title>
</head>
<body>
  <h1>Welcome to My Web Page</h1>
  <p>This is my first web page.</p>
</body>
</html>


In [27]:
exact_match = is_exact_match(generated_answer, answer)
print(exact_match)

False


Run over entire dataset

In [28]:
from tqdm import tqdm
import pandas as pd
import torch
import torch.nn.functional as F

In [39]:
"""def calculate_loss(predicted_output, target_output, model, tokenizer, device):
     Tokenize target output
    target_ids = tokenizer.encode(
        target_output,
        return_tensors="pt",
        truncation=True,
        max_length=1000  # Set an appropriate max length for target sequences
    ).to(device)

    # Tokenize predicted output
    predicted_ids = tokenizer.encode(
        predicted_output,
        return_tensors="pt",
        truncation=True,
        max_length=1000  # Set an appropriate max length for predicted sequences
    ).to(device)

    # Print for debugging
    print("Target IDs:", target_ids)
    print("Predicted IDs:", predicted_ids)

    # Compute cross-entropy loss
    loss = F.cross_entropy(
        model(input_ids=target_ids, return_dict=True).logits,
        predicted_ids.squeeze(),
    )

    return loss

def inference_and_loss(text, target_output, model, tokenizer, device):
    # Tokenize
    input_ids = tokenizer.encode(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=1000  # Set an appropriate max length for input sequences
    ).to(device)

    # Generate
    generated_tokens_with_prompt = model.generate(
        input_ids=input_ids,
        max_length=100  # Set an appropriate max length for generated sequences
    )

    # Decode
    generated_text_with_prompt = tokenizer.batch_decode(generated_tokens_with_prompt, skip_special_tokens=True)

    # Strip the prompt
    generated_text_answer = generated_text_with_prompt[0][len(text):]

    # Calculate loss
    loss = calculate_loss(generated_text_answer, target_output, model, tokenizer, device)

    return generated_text_answer, loss"""

active_code_line = "This line is inactive "

In [40]:
#Initializing Variables:
n = 10
metrics = {'exact_matches': []}
predictions = []

#Iterating through Test Data

for i, item in tqdm(enumerate(data_split["test"])):
    print("i Evaluating: " + str(item))
    instruction = item['instruction']
    output = item['output']
 #Generating Predictions
    try:
      predicted_output = inference(instruction, trained_model, tokenizer)
    except:
      continue
    predictions.append([predicted_output, output])
  #Calculating Exact Match Metric
    #fixed: exact_match = is_exact_match(generated_output, output)
    exact_match = is_exact_match(predicted_output, output)
    metrics['exact_matches'].append(exact_match)
   #Terminating Early (Optional)
    if i > n and n != -1:
      break
print('Number of exact matches: ', sum(metrics['exact_matches']))

0it [00:00, ?it/s]

i Evaluating: {'output': '<html>\n <head>\n  <title>My Webpage</title>\n  <meta http-equiv="refresh" content="5">\n </head>\n <body>\n  Hello World!\n </body>\n</html>', 'instruction': 'Update the following HTML page so that it refreshes the page every 5 seconds.', 'input': '<html>\n <head>\n  <title>My Webpage</title>\n </head>\n <body>\n  Hello World!\n </body>\n</html>', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 6422, 278, 1494, 4544, 1813, 577, 393, 372, 11086, 267, 278, 1813, 1432, 29871, 29945, 6923, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 529, 2813, 29958, 13, 29871, 529, 3257, 29958, 3421, 2563, 3488, 829, 3257, 29958, 13, 29871, 529, 7299, 1732, 29899, 9402, 543, 22379, 29908, 2793, 543, 29945, 1013, 13, 1533, 2813, 29958, 13, 529, 2587, 29958, 13, 29871, 15043, 2787, 29991, 13, 1533, 2587, 29958, 13, 8

1it [00:09,  9.29s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html>\n<head>\n  <title>My HTML Page</title>\n</head>\n<body>\n  <h1>My HTML Page</h1>\n  <p>This is a basic HTML page with a heading and a paragraph of text.</p> \n</body>\n</html>', 'instruction': 'Create a basic HTML page with a heading and a paragraph of text.', 'input': '', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 263, 6996, 4544, 1813, 411, 263, 28435, 322, 263, 14880, 310, 1426, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 29958, 13, 29966, 2813, 29958, 13, 29871, 529, 3257, 29958, 3421, 4544, 9305, 829, 3257, 29958, 13, 829, 2813, 29958, 13, 29966, 2587, 29958, 13, 29871, 529, 29882, 29896, 29958, 3421, 4544, 9305, 829, 29882, 29896, 29958, 13, 29871, 529, 29886, 29958, 4013, 338, 263, 6996, 4544, 1813, 411, 263, 28435, 322, 263, 14

2it [00:18,  9.13s/it]

i Evaluating: {'output': '<html>\n<head>\n  <title>My Web Page</title>\n</head>\n<body>\n  <h1>Welcome to My Web Page</h1>\n  <p>This is my first web page.</p>\n</body>\n</html>', 'instruction': 'Create a simple HTML webpage without using any external stylesheets.', 'input': '', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 263, 2560, 4544, 24499, 1728, 773, 738, 7029, 11949, 354, 1691, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29966, 2813, 29958, 13, 29871, 529, 3257, 29958, 3421, 2563, 9305, 829, 3257, 29958, 13, 829, 2813, 29958, 13, 29966, 2587, 29958, 13, 29871, 529, 29882, 29896, 29958, 28862, 2763, 304, 1619, 2563, 9305, 829, 29882, 29896, 29958, 13, 29871, 529, 29886, 29958, 4013, 338, 590, 937, 1856, 1813, 21106, 29886, 29958, 13, 829, 2587, 29958, 13, 829, 1420, 29958], 'attention_mask': [1, 1, 1, 1, 1

3it [00:26,  8.50s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html lang="en">\n<head>\n <meta charset="UTF-8">\n <meta name="viewport" content="width=device-width, initial-scale=1.0">\n <title>Array values</title>\n</head>\n<body>\n <ul>\n   <% for (let i = 0; i < arr.length; i++) { %>\n        <li><%= arr[i] %></li>\n   <% } %>\n </ul>\n</body>\n</html>', 'instruction': 'Using an array, create a web page that prints out all of the array elements.', 'input': 'arr = [1, 2, 3, 4, 5]', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 15156, 385, 1409, 29892, 1653, 263, 1856, 1813, 393, 14677, 714, 599, 310, 278, 1409, 3161, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 6361, 543, 264, 1013, 13, 29966, 2813, 29958, 13, 529, 7299, 17425, 543, 10496, 29899, 29947, 1013, 13, 529, 7299, 1024, 543, 1493, 637, 29908, 2793, 54

4it [00:33,  8.21s/it]

i Evaluating: {'output': '<html>\n<head>\n<style>\n.container {\n    width: 300px;\n    height: 200px;\n    background-color: blue;\n}\n\n.text {\n    color: white;\n    font-family: Arial;\n    font-size: 16px;\n    padding: 15px;\n    text-align: center;\n}\n</style>\t\n</head>\n\n<body>\n\n<div class="container">\n  <div class="text">Hello World!</div>\n</div>\n\n</body>\n</html>', 'instruction': 'Build a HTML page using the given CSS class', 'input': 'CSS Classes:\n\n.container {\n    width: 300px;\n    height: 200px;\n    background-color: blue;\n}\n\n.text {\n    color: white;\n    font-family: Arial;\n    font-size: 16px;\n    padding: 15px;\n    text-align: center;\n}', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 8893, 263, 4544, 1813, 773, 278, 2183, 6783, 770, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29966, 2813

5it [00:41,  8.03s/it]

i Evaluating: {'output': '<html>\n  <head>\n    <title>Test</title>\n  </head>\n  <body>\n    <h1>My Heading</h1>\n    <p>This is a test.\n  </body>\n</html>', 'instruction': 'Write a number of HTML tags such that their end tags are mismatched.', 'input': '', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 6113, 263, 1353, 310, 4544, 8282, 1316, 393, 1009, 1095, 8282, 526, 29635, 287, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29871, 529, 2813, 29958, 13, 1678, 529, 3257, 29958, 3057, 829, 3257, 29958, 13, 29871, 1533, 2813, 29958, 13, 29871, 529, 2587, 29958, 13, 1678, 529, 29882, 29896, 29958, 3421, 940, 9382, 829, 29882, 29896, 29958, 13, 1678, 529, 29886, 29958, 4013, 338, 263, 1243, 29889, 13, 29871, 1533, 2587, 29958, 13, 829, 1420, 29958], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

6it [00:48,  7.59s/it]

i Evaluating: {'output': "<html>\n<body>\n   <script>\n\t  function onPageLoad() {\n\t\t  var name = prompt('Please enter your name: ');\n\t\t  alert('Welcome ' + name + '!');\n\t  }\n\t  onPageLoad();\n   </script>\n</body>\n</html>", 'instruction': 'Create a HTML page that takes your name as input and welcomes you on page load.', 'input': '', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 263, 4544, 1813, 393, 4893, 596, 1024, 408, 1881, 322, 5476, 26807, 366, 373, 1813, 2254, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29966, 2587, 29958, 13, 259, 529, 2154, 29958, 13, 12, 29871, 740, 373, 5074, 5896, 580, 426, 13, 12, 12, 29871, 722, 1024, 353, 9508, 877, 12148, 3896, 596, 1024, 29901, 525, 416, 13, 12, 12, 29871, 6655, 877, 28862, 2763, 525, 718, 1024, 718, 525, 29991, 2157, 13, 12, 29871, 500, 13, 12, 29871, 

7it [00:55,  7.42s/it]

i Evaluating: {'output': '<html>\n  <body>\n    <form>\n      <input type="checkbox" />\n    </form>\n  </body>\n</html>', 'instruction': 'Create an HTML page with a form containing a checkbox.', 'input': '', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 385, 4544, 1813, 411, 263, 883, 6943, 263, 12527, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29871, 529, 2587, 29958, 13, 1678, 529, 689, 29958, 13, 418, 529, 2080, 1134, 543, 12348, 29908, 2900, 13, 1678, 1533, 689, 29958, 13, 29871, 1533, 2587, 29958, 13, 829, 1420, 29958], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


8it [01:04,  7.85s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html>\n<head>\n  <title>Hunted Maze</title>\n  <style>\n    * {\n      box-sizing: border-box;\n    }\n    body{\n      font-family: sans-serif;\n      background-color: #eee;\n      text-align: center;\n      padding: 20px;\n    }\n    h1 {\n      font-size: 2rem;\n      color: #444;\n    }\n    .maze {\n      position: relative;\n      width: 800px;\n      height: 600px;\n      background-color: #f0f0f0;\n      border-radius: 8px;\n    }\n    .box {\n      position: absolute;\n      width: 25px;\n      height: 25px;\n      background-color: #444;\n      border-radius: 4px;\n      top: 0;\n      left: 0;\n    }\n  </style>\n</head>\n<body>\n  <h1>Hunted Maze</h1>\n  <div class="maze">\n    <div class="box"></div>\n  </div>\n  <script>\n    // add logic for the game\n\n  </script>\n</body>\n</html>', 'instruction': 'Design an interactive game using HTML, CSS and JavaScript.', 'input': '', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3

9it [01:12,  8.03s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html>\n  <head>\n    <title>My Page</title>\n  </head>\n  <body>\n    <h1>My Page</h1>\n    <p>This is my first HTML page.</p>\n    <img src="sample-image.jpg" alt="sample image">\n  </body>\n</html>', 'instruction': 'Construct a basic HTML page that renders a heading, a description of the page, and a photo.', 'input': '', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 1168, 4984, 263, 6996, 4544, 1813, 393, 7697, 414, 263, 28435, 29892, 263, 6139, 310, 278, 1813, 29892, 322, 263, 15373, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 29958, 13, 29871, 529, 2813, 29958, 13, 1678, 529, 3257, 29958, 3421, 9305, 829, 3257, 29958, 13, 29871, 1533, 2813, 29958, 13, 29871, 529, 2587, 29958, 13, 1678, 529, 29882, 29896, 29958, 3421, 9305, 829, 29882, 29896, 2995

10it [01:21,  8.30s/it]

i Evaluating: {'output': '<html>\n  <head>\n    <!-- Include the script -->\n    <script src="alert.js"></script>\n  </head>\n  <body>\n    <button id="btnAlert" onclick="alertFunction()">Alert</button>\n  </body>\n</html>\n\n// alert.js\n\nfunction alertFunction() {\n  alert("This is an alert!");\n}', 'instruction': 'Write an HTML page that displays a Javascript alert when a button is clicked.', 'input': '', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 6113, 385, 4544, 1813, 393, 14423, 263, 12728, 6655, 746, 263, 2826, 338, 11484, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29871, 529, 2813, 29958, 13, 1678, 10341, 512, 2325, 278, 2471, 6660, 13, 1678, 529, 2154, 4765, 543, 12888, 29889, 1315, 5319, 2154, 29958, 13, 29871, 1533, 2813, 29958, 13, 29871, 529, 2587, 29958, 13, 1678, 529, 3092, 1178, 543, 7290, 16649, 29

11it [01:32,  9.17s/it]

i Evaluating: {'output': '<html>\n    <head>\n        <title>Page Title</title>\n    </head>\n    <body class="main">\n    </body>\n</html>', 'instruction': "Edit the following HTML code snippet to give the <body> tag the class 'main'.", 'input': '<html>\n    <head>\n        <title>Page Title</title>\n    </head>\n</html>', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 6103, 278, 1494, 4544, 775, 11534, 304, 2367, 278, 529, 2587, 29958, 4055, 278, 770, 525, 3396, 4286, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 1678, 529, 2813, 29958, 13, 4706, 529, 3257, 29958, 5074, 18527, 829, 3257, 29958, 13, 1678, 1533, 2813, 29958, 13, 1678, 529, 2587, 770, 543, 3396, 1013, 13, 1678, 1533, 2587, 29958, 13, 829, 1420, 29958], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

11it [01:40,  9.13s/it]

Number of exact matches:  0





In [41]:
df = pd.DataFrame(predictions, columns=["predicted_answer", "target_answer"])
print(df)

                                     predicted_answer  \
0   \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
1   \n\n```\n<!DOCTYPE html>\n<html>\n  <head>\n  ...   
2   \n\nHere is an example of a simple HTML webpag...   
3   \n\n```\n# Define an array of integers\nmy_arr...   
4   \n\nI have a CSS class called `my-class` and I...   
5   \n\nFor example, you could write:\n\n<html>\n<...   
6   \n\nHere is an example of how you can create a...   
7   \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
8   \n\nDesign an interactive game using HTML, CSS...   
9   \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
10  \n\nHere is an example of an HTML page that di...   
11  \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   

                                        target_answer  
0   <html>\n <head>\n  <title>My Webpage</title>\n...  
1   <!DOCTYPE html>\n<html>\n<head>\n  <title>My H...  
2   <html>\n<head>\n  <title>My Web Page</title>\n...  
3   <!DOCTYPE html>\n<html lang="e

#API Development