In [1]:
import random
import re
import utils

def check_tasks(tasks):
  prompt = ''
  for idx, task_dict in enumerate(tasks):
    if 'instances' in task_dict:
      (instruction, input, output) = task_dict["instruction"], task_dict["instances"][0]["input"], task_dict["instances"][0]["output"]
    else:
      (instruction, input, output) = task_dict["instruction"], task_dict["input"], task_dict["output"]
    instruction = re.sub(r"\s+", " ", instruction).strip().rstrip(":")
    
    input = "<noinput>" if input.lower() == "" else input
    prompt += f"###\n"
    prompt += f"{idx + 1}. Instruction: {instruction}\n"
    prompt += f"{idx + 1}. Input:\n{input}\n"
    prompt += f"{idx + 1}. Output:\n{output}\n"
  print(prompt)

def get_tasks_outputs(tasks):
  return [
    task_dict["instances"][0]["output"] if 'instances' in task_dict else task_dict["output"] 
    for task_dict in tasks
  ]

# Seed Tasks

In [4]:
seed_tasks = utils.jload('seed_tasks.json')
check_tasks(random.sample(seed_tasks, 3))

###
1. Instruction: Perform a code review on the provided HTML/CSS code file. Evaluate the code based on the following factors: readability, uniformity, understandability, correctness, performance, and security.
1. Input:
```
<!DOCTYPE html>
<html>
<head>
<title>My Website</title>
<style>
  body {
    font-family: Arial, sans-serif;
    font-size: 16px;
    margin: 0;
    padding: 0;
  }
  h1 {
    color: #ff0000;
    font-size: 24px;
    margin-bottom: 20px;
  }
  .container {
    max-width: 1200px;
    margin: 0 auto;
    padding: 20px;
  }
  .text {
    font-weight: bold;
  }
  .btn {
    background-color: #007bff;
    color: #fff;
    padding: 10px 20px;
    text-decoration: none;
  }
  .btn:hover {
    background-color: #0056b3;
  }
</style>
</head>
<body>
  <div class="container">
    <h1>Welcome to My Website</h1>
    <p class="text">Lorem ipsum dolor sit amet, consectetur adipiscing elit.</p>
    <a class="btn" href="#">Learn More</a>
  </div>
</body>
</html>
```
1. Output:
- R

# Generated Tasks

In [9]:
regen_tasks = utils.jload('regen.json')
print(f'Generated Tasks Length: {len(regen_tasks)}')

Generated Tasks Length: 100


## Check Tasks Output

In [10]:
def format_ouput(output):
    lines = output.split('\n')
    result = {}

    i = 0
    while i < len(lines):
        line = lines[i].strip()
        if line.startswith('-'):
            key = line
            result[key] = []
            i += 1
            while i < len(lines) and lines[i].startswith('  -'):
                result[key].append(lines[i])
                i += 1
        else:
            i += 1

    formatted_result = {}
    for key, values in result.items():
        formatted_result[key] = values

    return formatted_result
    
tasks_outputs = get_tasks_outputs(regen_tasks)

for idx, output in enumerate(tasks_outputs):
    print(f'Output #{idx+1}')
    print('avg_similarity_score\t>> ', regen_tasks[idx]["avg_similarity_score"]) # I use this for unique ID
    formatted_output = format_ouput(output)
    factors = list(formatted_output.keys())
    factors = [s.split('(')[0].strip('- ') for s in factors]
    print('factors\t\t\t>> ', factors)
    print()

Output #1
avg_similarity_score	>>  0.6498224159640215
factors			>>  ['Readability', 'Uniformity', 'Understandability', 'Correctness', 'Performance', 'Security']

Output #2
avg_similarity_score	>>  0.6337972300592833
factors			>>  ['Readability', 'Uniformity', 'Understandability', 'Correctness', 'Performance', 'Security']

Output #3
avg_similarity_score	>>  0.5606345009918464
factors			>>  ['Readability', 'Uniformity', 'Understandability', 'Correctness', 'Performance', 'Security']

Output #4
avg_similarity_score	>>  0.5926186629462296
factors			>>  ['Readability', 'Uniformity', 'Understandability', 'Correctness', 'Performance', 'Security']

Output #5
avg_similarity_score	>>  0.6784472824318495
factors			>>  ['Readability', 'Uniformity', 'Understandability', 'Correctness', 'Performance', 'Security']

Output #6
avg_similarity_score	>>  0.5444074776153178
factors			>>  ['Readability', 'Uniformity', 'Understandability', 'Correctness', 'Performance', 'Security']

Output #7
avg_similarity_sco