#Load required libraries

In [1]:
!pip install -q -U transformers peft accelerate optimum
!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu117/
!pip install -q datasets
!pip install loralib==0.1.1

Looking in indexes: https://pypi.org/simple, https://huggingface.github.io/autogptq-index/whl/cu117/


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer, GPTQConfig
import torch
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import load_dataset
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling

#Load quantized model using PEFT

In [3]:
# Specify the model ID to be loaded:

model_id = "TheBloke/Llama-2-7b-Chat-GPTQ"

# Define the quantization configuration:
gptq_config  = GPTQConfig(bits=4  # Quantize model weights to 4 bits for reduced size and faster inference.
                          , disable_exllama=True) # disabled the exllama kernel because training with exllama kernel is unstable

# Load the quantized model:
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=gptq_config ,
                                             device_map="auto", # Automatically distribute the model across available devices (if applicable)
                                             trust_remote_code=True) # Necessary for loading models with custom code components.

Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. ['use_cuda_fp16', 'use_exllama', 'max_input_length', 'exllama_config', 'disable_exllama']) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


In [4]:
# Load the appropriate tokenizer for the specified model:
tokenizer = AutoTokenizer.from_pretrained(model_id)

In [5]:
model.config.quantization_config.to_dict()

{'quant_method': <QuantizationMethod.GPTQ: 'gptq'>,
 'bits': 4,
 'tokenizer': None,
 'dataset': None,
 'group_size': 128,
 'damp_percent': 0.01,
 'desc_act': False,
 'sym': True,
 'true_sequential': True,
 'use_cuda_fp16': False,
 'model_seqlen': None,
 'block_name_to_quantize': None,
 'module_name_preceding_first_block': None,
 'batch_size': 1,
 'pad_token_id': None,
 'use_exllama': False,
 'max_input_length': None,
 'exllama_config': {'version': <ExllamaVersion.ONE: 1>},
 'cache_block_outputs': True}

In [6]:
model.gradient_checkpointing_enable() # Activate gradient checkpointing for memory optimization during training.
model = prepare_model_for_kbit_training(model) # Apply necessary modifications for K-bit training

In [7]:
r=8 # Set the rank for low-rank attention approximation
alpha=32 # Set the scaling factor for attention scores
dropout=0.05 # Set the dropout rate for regularization

config = LoraConfig(
    r=r,
    lora_alpha=alpha,
    target_modules=["k_proj","o_proj","q_proj","v_proj"],  # Apply LoRA to these specific projection modules.
    lora_dropout=dropout,
    bias="none",    # Disable biases in attention modules
    task_type="CAUSAL_LM"  # Specify the model's task as causal language modeling.
)

model = get_peft_model(model, config)  # Apply PEFT (Predictive Efficient Fine-Tuning) to the model using the LoRA configuration.
model.print_trainable_parameters()

trainable params: 8,388,608 || all params: 270,798,848 || trainable%: 3.097726619575575


#Load the dataset

In [8]:
from datasets import load_dataset
data = load_dataset("ttbui/alpaca_webgen_html", split="train")
data

Dataset({
    features: ['output', 'input', 'instruction'],
    num_rows: 528
})

In [9]:
def tokenize_function(dataset):
  #Data Structure Check:
    if "instruction" in dataset and "output" in dataset:
    #Prompt Construction:
      prompt_template = "Below is instruction that describes a task to code in HTML,what is output in HTML: \n \n'"
      instruction = dataset["instruction"][0]
      response = dataset["output"][0]

      text_with_prompt = (prompt_template +
                          '### Instruction: \n' +instruction +
                          '\n ### Response: \n' + response)

    #Tokenization
    tokenizer.pad_token = tokenizer.eos_token  # Set padding token to the end-of-sentence token
    tokenized_inputs = tokenizer(
        text_with_prompt,
        return_tensors="np",   #Return NumPy tensors
        padding=True,    #Pad sequences to equal length
    )

    max_length = min(
        tokenized_inputs["input_ids"].shape[1],
        2048    # Set maximum length to 2048 or the actual length, whichever is shorter
    )
    tokenizer.truncation_side = "left"  # Truncate from the left if necessary
    tokenized_inputs = tokenizer(
        text_with_prompt,
        return_tensors="np",
        truncation=True,   # Enable truncation
        max_length=max_length
    )

    return tokenized_inputs

In [10]:
#Tokenization Mapping
tokenized_dataset = data.map(
    tokenize_function,
    batched=True,
    batch_size=1,
    drop_last_batch=True
)

In [11]:
tokenized_dataset

Dataset({
    features: ['output', 'input', 'instruction', 'input_ids', 'attention_mask'],
    num_rows: 528
})

In [12]:
#splitting into Testing and training sets
data_split = tokenized_dataset.train_test_split(test_size=0.25, shuffle=True, seed=123)
data_split

DatasetDict({
    train: Dataset({
        features: ['output', 'input', 'instruction', 'input_ids', 'attention_mask'],
        num_rows: 396
    })
    test: Dataset({
        features: ['output', 'input', 'instruction', 'input_ids', 'attention_mask'],
        num_rows: 132
    })
})

#Check base model results

In [13]:
def is_exact_match(a, b):
    return a.strip() == b.strip()

model.eval()

def inference(text, model, tokenizer, max_input_tokens=1000, max_output_tokens=100):
  # Tokenize
  input_ids = tokenizer.encode(
          text,
          return_tensors="pt",
          truncation=True,
          max_length=max_input_tokens
  )

  # Generate
  device = model.device
  generated_tokens_with_prompt = model.generate(
    input_ids=input_ids.to(device),
    max_length=max_output_tokens
  )

  # Decode
  generated_text_with_prompt = tokenizer.batch_decode(generated_tokens_with_prompt, skip_special_tokens=True)

  # Strip the prompt
  generated_text_answer = generated_text_with_prompt[0][len(text):]

  return generated_text_answer

In [14]:
# Retrieve a specific test question from the dataset:
test_question = data_split["test"]['instruction'][2]

# Generate an answer using the model and tokenizer:
generated_answer = inference(test_question, model, tokenizer)

print(test_question)
print(generated_answer)



Create a simple HTML webpage without using any external stylesheets.


Create a simple HTML webpage without using any external stylesheets. The webpage should have a header, a paragraph of text, and a link to another webpage.

Here is the HTML code for the webpage:
```
<!DOCTYPE html>
<html>
  <head>
    <title>My Simple HTML Page</title>
  </head>
  <body>
   


#Train the model

In [25]:
# Set padding token for consistent length handling:
tokenizer.pad_token = tokenizer.eos_token

# Create a Trainer instance for model training:
trainer = Trainer(
    model=model,                          # Specify the model to be trained
    train_dataset=data_split["train"],
    args=TrainingArguments(               # Configure training settings
        per_device_train_batch_size=2,    # Process 1 batch per device per gradient update.
        gradient_accumulation_steps=4,    # Accumulate gradients over 4 steps for effective batch size of 4.
        warmup_steps=2,                   # Gradually increase learning rate over 2 initial steps.
        max_steps=100,                     # Train for a maximum of 10 steps (adjust for actual training).
        learning_rate=2e-4,               # Set the learning rate
        fp16=True,                        # Enable mixed-precision training for potential speedup.
        logging_steps=3,                  # Log training progress every step.
        output_dir="outputs_dirc",        # Save model checkpoints and logs in the "outputs_dirc" directory
        optim="adamw_torch",              # PyTorch implementation
        ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)  # Prepare text inputs for language modeling
)

In [26]:
trainer.train()

Step,Training Loss
3,0.4398
6,0.5204
9,0.3959
12,0.4428
15,0.4438
18,0.3907
21,0.3994
24,0.527
27,0.4093
30,0.4247


TrainOutput(global_step=100, training_loss=0.3860903787612915, metrics={'train_runtime': 1076.0333, 'train_samples_per_second': 0.743, 'train_steps_per_second': 0.093, 'total_flos': 161505233436672.0, 'train_loss': 0.3860903787612915, 'epoch': 2.02})

In [27]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (rotary_emb): LlamaRotaryEmbedding()
              (k_proj): QuantLinear(
                (base_layer): QuantLinear()
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (quant_linear_module): QuantLinear()
              )
              (

#Save the model

In [28]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model  # Take care of distributed/parallel training
model_to_save.save_pretrained("output_dirc")

In [19]:
#from huggingface_hub import login
#login()

In [20]:
#model.push_to_hub("HTML-finetunined-WORK-A")

#Load the fine-tuned model from local

In [31]:
output_dir = "/content/output_dirc"

In [37]:
gptq_config = GPTQConfig(bits=4, use_exllama=False)

trained_model = AutoModelForCausalLM.from_pretrained(
output_dir, local_files_only=True,
quantization_config=gptq_config,
trust_remote_code=True, device_map="auto"
)

You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. ['use_cuda_fp16', 'use_exllama', 'max_input_length', 'exllama_config', 'disable_exllama']) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


#Evaluate the model

##Run model and compare to expected answer

In [38]:
test_question = data_split["test"]['instruction'][2]
generated_answer = inference(test_question, trained_model, tokenizer)
print(test_question)
print(generated_answer)

Create a simple HTML webpage without using any external stylesheets.


```
<!DOCTYPE html>
<html>
<head>
  <title>My Page</title>
</head>
<body>
  <h1>Hello World!</h1>
</body>
</html>
```

This is a basic HTML page without any external stylesheets. It contains a single heading element with the text "Hello World!


In [39]:
answer = data_split["test"]['output'][2]
print(answer)

<html>
<head>
  <title>My Web Page</title>
</head>
<body>
  <h1>Welcome to My Web Page</h1>
  <p>This is my first web page.</p>
</body>
</html>


In [40]:
exact_match = is_exact_match(generated_answer, answer)
print(exact_match)

False


##Run over entire dataset and compare

In [42]:
from tqdm import tqdm
import pandas as pd
import torch
import torch.nn.functional as F

In [43]:
#Initializing Variables:
n = 20
metrics = {'exact_matches': []}
predictions = []

#Iterating through Test Data

for i, item in tqdm(enumerate(data_split["test"])):
    print("i Evaluating: " + str(item))
    instruction = item['instruction']
    output = item['output']

 #Generating Predictions
    try:
      predicted_output = inference(instruction, trained_model, tokenizer)
    except:
      continue
    predictions.append([predicted_output, output])

  #Calculating Exact Match Metric
    #fixed: exact_match = is_exact_match(generated_output, output)
    exact_match = is_exact_match(predicted_output, output)
    metrics['exact_matches'].append(exact_match)

   #Terminating Early (Optional)
    if i > n and n != -1:
      break
print('Number of exact matches: ', sum(metrics['exact_matches']))

0it [00:00, ?it/s]

i Evaluating: {'output': '<html>\n <head>\n  <title>My Webpage</title>\n  <meta http-equiv="refresh" content="5">\n </head>\n <body>\n  Hello World!\n </body>\n</html>', 'input': '<html>\n <head>\n  <title>My Webpage</title>\n </head>\n <body>\n  Hello World!\n </body>\n</html>', 'instruction': 'Update the following HTML page so that it refreshes the page every 5 seconds.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 6422, 278, 1494, 4544, 1813, 577, 393, 372, 11086, 267, 278, 1813, 1432, 29871, 29945, 6923, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 529, 2813, 29958, 13, 29871, 529, 3257, 29958, 3421, 2563, 3488, 829, 3257, 29958, 13, 29871, 529, 7299, 1732, 29899, 9402, 543, 22379, 29908, 2793, 543, 29945, 1013, 13, 1533, 2813, 29958, 13, 529, 2587, 29958, 13, 29871, 15043, 2787, 29991, 13, 1533, 2587, 29958, 13, 8

1it [00:07,  7.86s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html>\n<head>\n  <title>My HTML Page</title>\n</head>\n<body>\n  <h1>My HTML Page</h1>\n  <p>This is a basic HTML page with a heading and a paragraph of text.</p> \n</body>\n</html>', 'input': '', 'instruction': 'Create a basic HTML page with a heading and a paragraph of text.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 263, 6996, 4544, 1813, 411, 263, 28435, 322, 263, 14880, 310, 1426, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 29958, 13, 29966, 2813, 29958, 13, 29871, 529, 3257, 29958, 3421, 4544, 9305, 829, 3257, 29958, 13, 829, 2813, 29958, 13, 29966, 2587, 29958, 13, 29871, 529, 29882, 29896, 29958, 3421, 4544, 9305, 829, 29882, 29896, 29958, 13, 29871, 529, 29886, 29958, 4013, 338, 263, 6996, 4544, 1813, 411, 263, 28435, 322, 263, 14

2it [00:14,  7.03s/it]

i Evaluating: {'output': '<html>\n<head>\n  <title>My Web Page</title>\n</head>\n<body>\n  <h1>Welcome to My Web Page</h1>\n  <p>This is my first web page.</p>\n</body>\n</html>', 'input': '', 'instruction': 'Create a simple HTML webpage without using any external stylesheets.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 263, 2560, 4544, 24499, 1728, 773, 738, 7029, 11949, 354, 1691, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29966, 2813, 29958, 13, 29871, 529, 3257, 29958, 3421, 2563, 9305, 829, 3257, 29958, 13, 829, 2813, 29958, 13, 29966, 2587, 29958, 13, 29871, 529, 29882, 29896, 29958, 28862, 2763, 304, 1619, 2563, 9305, 829, 29882, 29896, 29958, 13, 29871, 529, 29886, 29958, 4013, 338, 590, 937, 1856, 1813, 21106, 29886, 29958, 13, 829, 2587, 29958, 13, 829, 1420, 29958], 'attention_mask': [1, 1, 1, 1, 1

3it [00:22,  7.44s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html lang="en">\n<head>\n <meta charset="UTF-8">\n <meta name="viewport" content="width=device-width, initial-scale=1.0">\n <title>Array values</title>\n</head>\n<body>\n <ul>\n   <% for (let i = 0; i < arr.length; i++) { %>\n        <li><%= arr[i] %></li>\n   <% } %>\n </ul>\n</body>\n</html>', 'input': 'arr = [1, 2, 3, 4, 5]', 'instruction': 'Using an array, create a web page that prints out all of the array elements.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 15156, 385, 1409, 29892, 1653, 263, 1856, 1813, 393, 14677, 714, 599, 310, 278, 1409, 3161, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 6361, 543, 264, 1013, 13, 29966, 2813, 29958, 13, 529, 7299, 17425, 543, 10496, 29899, 29947, 1013, 13, 529, 7299, 1024, 543, 1493, 637, 29908, 2793, 54

4it [00:29,  7.28s/it]

i Evaluating: {'output': '<html>\n<head>\n<style>\n.container {\n    width: 300px;\n    height: 200px;\n    background-color: blue;\n}\n\n.text {\n    color: white;\n    font-family: Arial;\n    font-size: 16px;\n    padding: 15px;\n    text-align: center;\n}\n</style>\t\n</head>\n\n<body>\n\n<div class="container">\n  <div class="text">Hello World!</div>\n</div>\n\n</body>\n</html>', 'input': 'CSS Classes:\n\n.container {\n    width: 300px;\n    height: 200px;\n    background-color: blue;\n}\n\n.text {\n    color: white;\n    font-family: Arial;\n    font-size: 16px;\n    padding: 15px;\n    text-align: center;\n}', 'instruction': 'Build a HTML page using the given CSS class', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 8893, 263, 4544, 1813, 773, 278, 2183, 6783, 770, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29966, 2813

5it [00:37,  7.62s/it]

i Evaluating: {'output': '<html>\n  <head>\n    <title>Test</title>\n  </head>\n  <body>\n    <h1>My Heading</h1>\n    <p>This is a test.\n  </body>\n</html>', 'input': '', 'instruction': 'Write a number of HTML tags such that their end tags are mismatched.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 6113, 263, 1353, 310, 4544, 8282, 1316, 393, 1009, 1095, 8282, 526, 29635, 287, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29871, 529, 2813, 29958, 13, 1678, 529, 3257, 29958, 3057, 829, 3257, 29958, 13, 29871, 1533, 2813, 29958, 13, 29871, 529, 2587, 29958, 13, 1678, 529, 29882, 29896, 29958, 3421, 940, 9382, 829, 29882, 29896, 29958, 13, 1678, 529, 29886, 29958, 4013, 338, 263, 1243, 29889, 13, 29871, 1533, 2587, 29958, 13, 829, 1420, 29958], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

6it [00:44,  7.50s/it]

i Evaluating: {'output': "<html>\n<body>\n   <script>\n\t  function onPageLoad() {\n\t\t  var name = prompt('Please enter your name: ');\n\t\t  alert('Welcome ' + name + '!');\n\t  }\n\t  onPageLoad();\n   </script>\n</body>\n</html>", 'input': '', 'instruction': 'Create a HTML page that takes your name as input and welcomes you on page load.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 263, 4544, 1813, 393, 4893, 596, 1024, 408, 1881, 322, 5476, 26807, 366, 373, 1813, 2254, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29966, 2587, 29958, 13, 259, 529, 2154, 29958, 13, 12, 29871, 740, 373, 5074, 5896, 580, 426, 13, 12, 12, 29871, 722, 1024, 353, 9508, 877, 12148, 3896, 596, 1024, 29901, 525, 416, 13, 12, 12, 29871, 6655, 877, 28862, 2763, 525, 718, 1024, 718, 525, 29991, 2157, 13, 12, 29871, 500, 13, 12, 29871, 

7it [00:52,  7.45s/it]

i Evaluating: {'output': '<html>\n  <body>\n    <form>\n      <input type="checkbox" />\n    </form>\n  </body>\n</html>', 'input': '', 'instruction': 'Create an HTML page with a form containing a checkbox.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 385, 4544, 1813, 411, 263, 883, 6943, 263, 12527, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29871, 529, 2587, 29958, 13, 1678, 529, 689, 29958, 13, 418, 529, 2080, 1134, 543, 12348, 29908, 2900, 13, 1678, 1533, 689, 29958, 13, 29871, 1533, 2587, 29958, 13, 829, 1420, 29958], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


8it [01:00,  7.81s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html>\n<head>\n  <title>Hunted Maze</title>\n  <style>\n    * {\n      box-sizing: border-box;\n    }\n    body{\n      font-family: sans-serif;\n      background-color: #eee;\n      text-align: center;\n      padding: 20px;\n    }\n    h1 {\n      font-size: 2rem;\n      color: #444;\n    }\n    .maze {\n      position: relative;\n      width: 800px;\n      height: 600px;\n      background-color: #f0f0f0;\n      border-radius: 8px;\n    }\n    .box {\n      position: absolute;\n      width: 25px;\n      height: 25px;\n      background-color: #444;\n      border-radius: 4px;\n      top: 0;\n      left: 0;\n    }\n  </style>\n</head>\n<body>\n  <h1>Hunted Maze</h1>\n  <div class="maze">\n    <div class="box"></div>\n  </div>\n  <script>\n    // add logic for the game\n\n  </script>\n</body>\n</html>', 'input': '', 'instruction': 'Design an interactive game using HTML, CSS and JavaScript.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3

9it [01:08,  7.68s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html>\n  <head>\n    <title>My Page</title>\n  </head>\n  <body>\n    <h1>My Page</h1>\n    <p>This is my first HTML page.</p>\n    <img src="sample-image.jpg" alt="sample image">\n  </body>\n</html>', 'input': '', 'instruction': 'Construct a basic HTML page that renders a heading, a description of the page, and a photo.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 1168, 4984, 263, 6996, 4544, 1813, 393, 7697, 414, 263, 28435, 29892, 263, 6139, 310, 278, 1813, 29892, 322, 263, 15373, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 29958, 13, 29871, 529, 2813, 29958, 13, 1678, 529, 3257, 29958, 3421, 9305, 829, 3257, 29958, 13, 29871, 1533, 2813, 29958, 13, 29871, 529, 2587, 29958, 13, 1678, 529, 29882, 29896, 29958, 3421, 9305, 829, 29882, 29896, 2995

10it [01:15,  7.59s/it]

i Evaluating: {'output': '<html>\n  <head>\n    <!-- Include the script -->\n    <script src="alert.js"></script>\n  </head>\n  <body>\n    <button id="btnAlert" onclick="alertFunction()">Alert</button>\n  </body>\n</html>\n\n// alert.js\n\nfunction alertFunction() {\n  alert("This is an alert!");\n}', 'input': '', 'instruction': 'Write an HTML page that displays a Javascript alert when a button is clicked.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 6113, 385, 4544, 1813, 393, 14423, 263, 12728, 6655, 746, 263, 2826, 338, 11484, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29871, 529, 2813, 29958, 13, 1678, 10341, 512, 2325, 278, 2471, 6660, 13, 1678, 529, 2154, 4765, 543, 12888, 29889, 1315, 5319, 2154, 29958, 13, 29871, 1533, 2813, 29958, 13, 29871, 529, 2587, 29958, 13, 1678, 529, 3092, 1178, 543, 7290, 16649, 29

11it [01:23,  7.61s/it]

i Evaluating: {'output': '<html>\n    <head>\n        <title>Page Title</title>\n    </head>\n    <body class="main">\n    </body>\n</html>', 'input': '<html>\n    <head>\n        <title>Page Title</title>\n    </head>\n</html>', 'instruction': "Edit the following HTML code snippet to give the <body> tag the class 'main'.", 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 6103, 278, 1494, 4544, 775, 11534, 304, 2367, 278, 529, 2587, 29958, 4055, 278, 770, 525, 3396, 4286, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 1678, 529, 2813, 29958, 13, 4706, 529, 3257, 29958, 5074, 18527, 829, 3257, 29958, 13, 1678, 1533, 2813, 29958, 13, 1678, 529, 2587, 770, 543, 3396, 1013, 13, 1678, 1533, 2587, 29958, 13, 829, 1420, 29958], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

12it [01:30,  7.52s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html>\n<head>\n <title>My Web Page</title>\n</head>\n<body>\n <h1>This is my title</h1>\n <p>This is some content.</p>\n <button>Click Me!</button>\n</body>\n</html>', 'input': '', 'instruction': 'Design a web page in HTML5 with a heading, a paragraph and a button.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4002, 647, 263, 1856, 1813, 297, 4544, 29945, 411, 263, 28435, 29892, 263, 14880, 322, 263, 2826, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 29958, 13, 29966, 2813, 29958, 13, 529, 3257, 29958, 3421, 2563, 9305, 829, 3257, 29958, 13, 829, 2813, 29958, 13, 29966, 2587, 29958, 13, 529, 29882, 29896, 29958, 4013, 338, 590, 3611, 829, 29882, 29896, 29958, 13, 529, 29886, 29958, 4013, 338, 777, 2793, 21106, 29886, 29958, 13, 529, 3092, 29958, 416

13it [01:38,  7.61s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html>\n   <head>\n      <title>RandomBackground</title>\n      <script>\n         window.addEventListener("load",function(){\n            var randomColor = "#"+((1<<24)*Math.random()|0).toString(16);\n            document.querySelector("body").style.background = randomColor;\n         });\n      </script>\n   </head>\n   <body>\n   </body>\n</html>', 'input': '', 'instruction': 'Create an HTML page where the main content area has a random background color when the page is loaded.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 385, 4544, 1813, 988, 278, 1667, 2793, 4038, 756, 263, 4036, 3239, 2927, 746, 278, 1813, 338, 7500, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 29958, 13, 259, 529, 2813, 29958, 13, 418, 529, 3257, 29958, 17875, 10581, 82

14it [01:45,  7.38s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html lang="en">\n<head>\n <meta charset="UTF-8">\n <meta name="viewport" content="width=device-width, initial-scale=1.0">\n <title>Login Form</title>\n</head>\n<body>\n <form>\n  <label>Username:</label>\n  <input type="text" name="username" required>\n  <label>Password:</label>\n  <input type="password" name="password" required>\n  <input type="submit" value="Login">\n </form>\n</body>\n</html>', 'input': '', 'instruction': 'Design a user interface in HTML for a login form.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4002, 647, 263, 1404, 5067, 297, 4544, 363, 263, 6464, 883, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 6361, 543, 264, 1013, 13, 29966, 2813, 29958, 13, 529, 7299, 17425, 543, 10496, 29899, 29947, 1013, 13, 529, 7299, 1024, 543, 14

15it [01:53,  7.57s/it]

i Evaluating: {'output': '<html>\n    <head></head>\n    <body>\n        <input type="text" id="textbox">\n        <button onclick="sayHello()">Say Hello</button>\n        <p id="message">Hello World!</p>\n        \n        <script>\n            function sayHello() {\n                var text = document.getElementById(\'textbox\').value;\n                document.getElementById(\'message\').innerHTML = "Hello " + text + "!";\n            }\n        </script>\n    </body>\n</html>', 'input': '', 'instruction': 'Create a HTML page with a textbox, button and a message “Hello World!”', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 263, 4544, 1813, 411, 263, 18932, 29892, 2826, 322, 263, 2643, 1346, 10994, 2787, 8530, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 1678, 529, 2813, 2565, 2813, 29958, 13, 1678, 529, 2587, 29958, 1

16it [02:02,  8.19s/it]

i Evaluating: {'output': '<html>\n <head>\n  <title>Styled Page</title>\n  <style type="text/css">\n    body {\n        background-color: white;\n    }\n    h1 {\n        font-family: \'Arial\', sans-serif;\n        font-size: 30px;\n        color: black;\n        text-align: center;\n    }\n  </style>\n </head>\n <body>\n  <h1>Styled Page</h1>\n </body>\n</html>', 'input': '<html>\n <head>\n  <title>Styled Page</title>\n </head>\n <body>\n  <h1>Styled Page</h1>\n </body>\n</html>', 'instruction': 'Modify the CSS in the given HTML code to style the web page', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 2111, 1598, 278, 6783, 297, 278, 2183, 4544, 775, 304, 3114, 278, 1856, 1813, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 529, 2813, 29958, 13, 29871, 529, 3257, 29958, 855, 29891, 839, 9305, 829, 3257, 29958, 13, 29871, 529, 

17it [02:15,  9.55s/it]

i Evaluating: {'output': '<html>\n    <head>\n        <title>Welcome to my website!</title>\n    </head>\n    <body>\n        <h1>Welcome to my website!</h1>\n    </body>\n</html>', 'input': '"Welcome to my website!"', 'instruction': 'Create a HTML page that prints the following message', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 263, 4544, 1813, 393, 14677, 278, 1494, 2643, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 1678, 529, 2813, 29958, 13, 4706, 529, 3257, 29958, 28862, 2763, 304, 590, 4700, 29991, 829, 3257, 29958, 13, 1678, 1533, 2813, 29958, 13, 1678, 529, 2587, 29958, 13, 4706, 529, 29882, 29896, 29958, 28862, 2763, 304, 590, 4700, 29991, 829, 29882, 29896, 29958, 13, 1678, 1533, 2587, 29958, 13, 829, 1420, 29958], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

18it [02:23,  8.95s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html lang="en">\n<head>\n  <meta charset="UTF-8">\n  <meta name="viewport" content="width=device-width, initial-scale=1.0">\n  <title>Title</title>\n</head>\n<body>\n  <h1>Home</h1>\n  <h1>About</h1>\n</body>\n</html>', 'input': '', 'instruction': 'Write an HTML page with two headings "Home" and "About".', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 6113, 385, 4544, 1813, 411, 1023, 2343, 886, 376, 11184, 29908, 322, 376, 28173, 1642, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 6361, 543, 264, 1013, 13, 29966, 2813, 29958, 13, 29871, 529, 7299, 17425, 543, 10496, 29899, 29947, 1013, 13, 29871, 529, 7299, 1024, 543, 1493, 637, 29908, 2793, 543, 2103, 29922, 10141, 29899, 2103, 29892, 2847, 29899, 7052, 29922, 29896, 29889, 29900, 1013, 13, 29871, 529, 3257

19it [02:31,  8.94s/it]

i Evaluating: {'output': '<html>\n<head>\n    <title>My Details</title>\n</head>\n<body>\n    <table>\n        <tr>\n            <th>Name</th>\n            <th>Age</th>  \n            <th>Occupation</th>  \n        </tr>\n        <tr>\n            <td> Joe Doe</td>\n            <td> 25 </td>\n            <td> Web Developer </td>\n        </tr>\n    </table>\n</body>\n</html>', 'input': '', 'instruction': 'Create a HTML page displaying a simple table showing your name, age and occupation.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 4391, 263, 4544, 1813, 16384, 263, 2560, 1591, 6445, 596, 1024, 29892, 5046, 322, 26818, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 29966, 2813, 29958, 13, 1678, 529, 3257, 29958, 3421, 25577, 829, 3257, 29958, 13, 829, 2813, 29958, 13, 29966, 2587, 29958, 13, 1678, 529, 2371, 29958, 13, 

20it [02:39,  8.52s/it]

i Evaluating: {'output': '<!DOCTYPE html>\n<html>\n<head>\n <title>Sample Form</title>\n</head>\n<body>\n  <form action="" method="post">\n   <input type="text" placeholder="Input..."/>\n   <input type="submit" value="Submit" />\n  </form>\n</body>\n</html>', 'input': '', 'instruction': 'Construct a HTML page that displays a form with a text field and a submit button.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 1168, 4984, 263, 4544, 1813, 393, 14423, 263, 883, 411, 263, 1426, 1746, 322, 263, 9752, 2826, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 29991, 21300, 3472, 29958, 13, 29966, 1420, 29958, 13, 29966, 2813, 29958, 13, 529, 3257, 29958, 17708, 3812, 829, 3257, 29958, 13, 829, 2813, 29958, 13, 29966, 2587, 29958, 13, 29871, 529, 689, 3158, 13776, 1158, 543, 2490, 1013, 13, 259, 529, 2080, 1134, 543, 726, 29908, 12983, 543, 4290

21it [02:47,  8.23s/it]

i Evaluating: {'output': '<html>\n    <head>\n    </head>\n    <body>\n        <form>\n            <label>Name: </label><input type="text" name="name"><br>\n            <label>Age: </label><input type="text" name="age"><br>\n            <input type="submit" value="Submit">\n        </form>\n    </body>\n</html>', 'input': '', 'instruction': 'Develop an HTML form which includes two input fields, an label and a submit button.', 'input_ids': [1, 13866, 338, 15278, 393, 16612, 263, 3414, 304, 775, 297, 4544, 29892, 5816, 338, 1962, 297, 4544, 29901, 29871, 13, 29871, 13, 29915, 2277, 29937, 2799, 4080, 29901, 29871, 13, 21956, 385, 4544, 883, 607, 7805, 1023, 1881, 4235, 29892, 385, 3858, 322, 263, 9752, 2826, 29889, 13, 835, 13291, 29901, 29871, 13, 29966, 1420, 29958, 13, 1678, 529, 2813, 29958, 13, 1678, 1533, 2813, 29958, 13, 1678, 529, 2587, 29958, 13, 4706, 529, 689, 29958, 13, 9651, 529, 1643, 29958, 1170, 29901, 1533, 1643, 5299, 2080, 1134, 543, 726, 29908, 1024, 543, 978, 3254, 1

21it [02:54,  8.33s/it]

Number of exact matches:  0





ZERO!! This metric for evaluation is not useful for this dataset

In [68]:
df = pd.DataFrame(predictions, columns=["predicted_answer", "target_answer"])
print(df)

                                     predicted_answer  \
0   \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
1   \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
2   \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
3   \n\nExample:\n\nconst myArray = [1, 2, 3, 4, 5...   
4   \n\n```\n<div class="example">\n  <p>This is a...   
5   \n\nExample:\n<div>\n  <p>Hello World!</p>\n</...   
6   \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
7   \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
8   \n\nDesign an interactive game using HTML, CSS...   
9   \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
10  \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
11  \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
12  \n\nHere is the HTML code for the page:\n\n<!D...   
13  \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
14  \n\nPlease design a user interface in HTML for...   
15  \n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <t...   
16  .\n\n<html>\n  <head>\n    

In [66]:
type(df['predicted_answer'])

pandas.core.series.Series

In [69]:
type(df['target_answer'])

pandas.core.series.Series

##Evaluation with Metric: chr_f

ChrF is a evaluation metrics that use the F-score statistic for character n-gram matches. We use the implementation that is already present in sacrebleu

In [57]:
!pip install sacrebleu

Collecting sacrebleu
  Downloading sacrebleu-2.4.0-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.3/106.3 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-2.8.2 sacrebleu-2.4.0


In [58]:
from datasets import load_metric  # For sacrebleu CHRF
chrf = load_metric("chrf")

  chrf = load_metric("chrf")
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/2.83k [00:00<?, ?B/s]

In [71]:
prediction = df['predicted_answer'].tolist()  # Convert Series to list
reference = df['target_answer'].tolist()  # Convert Series to list

# Create a list of lists for reference (if needed)
if not isinstance(reference[0], list):
    reference = [[ref] for ref in reference]

In [72]:
results = chrf.compute(predictions=prediction, references=reference)
print(results)

{'score': 40.14326320585296, 'char_order': 6, 'word_order': 0, 'beta': 2}


#API Development

In [77]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
# Install required libraries
!pip install flask



In [None]:
from flask import Flask, request, jsonify
from threading import Thread
import atexit

output_dir = '/content/output_dirc'
gptq_config = GPTQConfig(bits=4, use_exllama= False)

trained_model = AutoModelForCausalLM.from_pretrained(
output_dir, local_files_only=True,
quantization_config=gptq_config,
trust_remote_code=True, device_map="auto"
)

app = Flask(__name__)

@app.route('/generate_html', methods=['POST'])
def generate_html():
    try:
        data = request.get_json()
        prompt = data['prompt']

        generated_html = inference(instruction, trained_model, tokenizer)

        return jsonify({'generated_html': generated_html})

    except Exception as e:
        return jsonify({'error': str(e)})

def run_flask_app():
    app.run(port=5000)

# Run Flask app in the background
flask_thread = Thread(target=run_flask_app)
flask_thread.start()

# Stop the server on exit
atexit.register(lambda: app.shutdown())

# The rest of your code for testing with requests can go here


Using `disable_exllama` is deprecated and will be removed in version 4.37. Use `use_exllama` instead and specify the version with `exllama_config`.The value of `use_exllama` will be overwritten by `disable_exllama` passed in `GPTQConfig` or stored in your config file.
You passed `quantization_config` to `from_pretrained` but the model you're loading already has a `quantization_config` attribute and has already quantized weights. However, loading attributes (e.g. ['use_cuda_fp16', 'use_exllama', 'max_input_length', 'exllama_config', 'disable_exllama']) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.


<function __main__.<lambda>()>

 * Serving Flask app '__main__'


In [None]:
import requests

# Replace with your actual prompt
prompt = "Generate HTML code for a simple webpage with a heading and a paragraph."

# Define the JSON payload
data = {"prompt": prompt}

# Send the POST request to the Flask API
response = requests.post("http://127.0.0.1:5000/generate_html", json=data)

# Print the response
print(response.json())


INFO:werkzeug:127.0.0.1 - - [24/Dec/2023 20:33:03] "POST /generate_html HTTP/1.1" 200 -


{'generated_html': '\n\n```\n<!DOCTYPE html>\n<html>\n<head>\n  <title>My Webpage</title>\n</head>\n<body class="main">\n  <!-- Your content here -->\n</body>\n</html>\n```\n\nAnswer: To give the \\begin{code}\n<body>\n\\end{code} tag the class'}
