In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>div.output_scroll { height: 44em; }</style>"))

from typing import Text, Generator, Tuple, List, Optional, Dict, Set
import pandas as pd
import numpy as np
from ast import literal_eval
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os
import json
import re
import uuid
from mpl_toolkits.axes_grid1 import ImageGrid
import math
sns.set_theme()

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 5000)
pd.set_option('display.max_colwidth', 5000)

# 1. OpenAI

In [2]:
from openai import OpenAI
client = OpenAI()

In [3]:
def generate_completion(prompt, model='gpt-4o', temp=0.50, n=1, max_tokens=30000, logprobs=False, top_logprobs=5, return_obj=True, verbose=False, num_rows=-1):
    if verbose:
        if num_rows == -1:
            print('Generating completion...')
        else:
            print('[{}] - Generating completion...'.format(num_rows))
    if logprobs:
        chat_completion = client.chat.completions.create(
              model=model,
              messages=[
                    {"role": "user", "content": prompt},
                ],
              temperature=temp,
              n=n,
              logprobs=logprobs,
        )
    else:
        chat_completion = client.chat.completions.create(
              model=model,
              messages=[
                    {"role": "user", "content": prompt},
                ],
              temperature=temp,
              n=n,
        )
        if chat_completion.choices[0].finish_reason == 'length':
            print("Response cut off. Starting a second request... Length: {}".format(chat_completion.usage.completion_tokens))
            prompt += '\n -------- \nYour last response was cut off. Continue exactly where you left.'
            chat_completion_2 = client.chat.completions.create(
              model=model,
              messages=[
                    {"role": "user", "content": prompt},
                      {
                        "role": "assistant",
                        "content": chat_completion.choices[0].message.content
                    }
                ],
              temperature=temp,
              n=n,
              logprobs=logprobs,
        )
            chat_completion.choices[0].message.content += chat_completion_2.choices[0].message.content
            chat_completion.usage.completion_tokens += chat_completion_2.usage.completion_tokens
            chat_completion.usage.prompt_tokens += chat_completion_2.usage.prompt_tokens
            chat_completion.usage.total_tokens += chat_completion_2.usage.total_tokens
    return chat_completion if return_obj else [choice.message.content for choice in chat_completion.choices]

# 2. Load Dataset to Augment

In [4]:
def write_html_to_file_and_image(path, model_name, dataset):
    for index, row in dataset.iterrows():
        file_name = str(index) + '_' + str(row['name'])
        with open(path+str(index)+'.html', 'w', encoding='utf-8') as file:
            print(path+str(index)+'.html')
            file.write(row[model_name])
        try:
            imgkit.from_string(row[model_name], path+file_name+'.jpg', config=config)
        except:
            pass

In [6]:
df_html = pd.read_csv('../data/generated/00_dataset_test/data/01_baselines/zs_cot.csv')
df_html = df_html[['description', 'generated_html']]
df_html = df_html.rename(columns={'description':'name', 'generated_html':'html'})

# 3. Text Content Generation Prompting

In [12]:
PLACEHOLDER_HTML = 'placeholder_html'
PLACEHOLDER_DESCRIPTION = 'placeholder_description'

In [14]:
PROMPT_TEXT_CONTENT_GENERATION = '''
You are given a GUI in HTML and CSS. This GUI is a first draft for the requirement "placeholder_description". To make it look more realistic, 
your task is to recreate the HTML and CSS, but add concrete example data where it is necessary. For example, instead of having 
simple placeholder text such as "News 1" and "News headline 2", provide real example data. This also includes to, for example, 
add more list items to a list. For images, provide good descriptions or names in the "alt" tag, we will use these descriptions to 
generate fitting images later. However, do not use names of real persons or real companies in this description. Also, make sure that
the generated descriptions are not sensitive at all (avoiding all NSFW, violent and sexual content). If there is already sensitive content, change it to non-sensitive content.
To each of these images, you MUST add an "id" tag, we need this for later mapping back the generated images. To display icons properly, include popular 
icon frameworks and use them. If icons are represented as inside <img> tags, replace them with proper icons from an icon framework. Do everything, that makes the data used in the GUI look real. Note that the generated images are quite large
(1024 x 1024 px), therefore, if necessary, make sure to add proper sizing information to the CSS to avoid that the images are displayed
overly large. However, do not change anything else in the HTML/CSS which would lead to changes in the design, layout, feature set etc. 
Do not provide any explanation, as the output, provide the same HTML/CSS but with replaced and/or extended example data.
Again make sure to add an "id" tag to the images.

HTML/CSS:

placeholder_html
'''

In [15]:
df_html['html_added_content_prompt'] = df_html.apply(lambda row: PROMPT_TEXT_CONTENT_GENERATION.replace(PLACEHOLDER_DESCRIPTION, row['name']).replace(PLACEHOLDER_HTML, row['html']), axis=1)
df_html['html_added_content_completion'] = df_html.apply(lambda row: generate_completion(row['html_added_content_prompt'], model='gpt-4o', temp=0.5, logprobs=False, return_obj=True, verbose=True, num_rows=row.name), axis=1)
df_html['html_added_content_completion_content'] = df_html.apply(lambda row: row['html_added_content_completion'].choices[0].message.content, axis=1)

# 4. Image Extraction Prompting

In [25]:
PROMPT_IMAGE_EXTRACTION = '''
You are given a GUI in HTML and CSS for the description "placeholder_description". Your task is to extract a list of all the images contained in the HTML. 
For each image, you should extract the HTML "id" tag, the "alt" text and provide a more detailed brief description
of the image content. Do not extract images that represent icons, even though they might be represented with <img> tags. Do not provide any explanations, directly output  the list. Please provide the answer in the following format, provided as an example. In this example, the list contains
one example object. Make sure to only use " for the strings and inside strings use '. Example output format: [{"id": "id1", "alt": "alt_text", "description": "image_description"}]

HTML/CSS:

placeholder_html
'''

In [27]:
df_html['extract_images_prompt'] = df_html.apply(lambda row: PROMPT_IMAGE_EXTRACTION.replace(PLACEHOLDER_DESCRIPTION, row['name']).replace(PLACEHOLDER_HTML, row['html_added_content_completion_content']), axis=1)
df_html['extract_images_completion'] = df_html.apply(lambda row: generate_completion(row['extract_images_prompt'], model='gpt-4o', temp=0.5, logprobs=False, return_obj=True, verbose=True, num_rows=row.name), axis=1)
df_html['extract_images_completion_content'] = df_html.apply(lambda row: row['extract_images_completion'].choices[0].message.content, axis=1)

# 5. Generate Images and Upload to S3

In [40]:
PLACEHOLDER_IMAGES_LIST = 'placeholder_images_list'

In [42]:
PROMTP_REWRITE_DESCRIPTION = '''You are given a short description for an image, that was used to generate an image. However,
due to the description being somewhat sensitive (e.g., NSFW, violent or sexual direction), please rewrite the description to
keep its general information but reduce the sensitivity. Do not provide any explanation, directly output the rewritten description.
Description: placeholder_description
'''

In [43]:
import base64
import json
import boto3
from PIL import Image
import io
import uuid

S3_ClIENT = boto3.client('s3')
BUCKET_NAME = 'bucket_name'
BASE_IMAGE_URL = "base_url"
PLACEHOLDER_URL = "placeholder"

def get_placeholder_image_bytes():
    with open('../data/images/placeholder.png', 'rb') as image_file:
            image_data = image_file.read()
            base64_encoded_data = base64.b64encode(image_data)
            base64_string = base64_encoded_data.decode('utf-8')
            image_bytes_placeholder = base64.b64decode(base64_string)
            image_bytes_io_placeholder = io.BytesIO(image_bytes_placeholder)
    return image_bytes_io_placeholder

def generate_image(description, size="1024x1024", quality="hd", style="vivid"):
    try:
        image_completion = client.images.generate(
                  model='dall-e-3',
                  prompt=description,
                  size="1024x1024",
                  quality=quality,
                  n=1,
                  style=style,
                  response_format='b64_json'
            )
        image_url = image_completion.data[0]
        image_data = image_url.b64_json
        image_bytes = base64.b64decode(image_data)
        image_bytes_io = io.BytesIO(image_bytes)
    except Exception as e:
        print(f"An error occurred: {e}")
        print('Exception occurred for description: {}'.format(description))
        try:
            # Rewrite the description
            rewrite_prompt = PROMTP_REWRITE_DESCRIPTION.replace(PLACEHOLDER_DESCRIPTION, description)
            completion_rewrite = generate_completion(rewrite_prompt, model='gpt-4o', temp=0.50, n=1,
                                                     max_tokens=15500, logprobs=False, return_obj=True)
            print('Rewritten description: {}'.format(completion_rewrite.choices[0].message.content))
            image_completion = client.images.generate(
                      model='dall-e-3',
                      prompt=completion_rewrite.choices[0].message.content,
                      size="1024x1024",
                      quality=quality,
                      n=1,
                      style=style,
                      response_format='b64_json'
                )
            image_url = image_completion.data[0]
            image_data = image_url.b64_json
            image_bytes = base64.b64decode(image_data)
            image_bytes_io = io.BytesIO(image_bytes)
        except Exception as e:
            print(f"An error occurred: {e}")
            print('Exception occurred again for description: {}'.format(description))
            image_bytes_io = get_placeholder_image_bytes()
    return (image_bytes_io, image_completion)

def upload_image_to_s3(image_bytes_io, bucket_name, object_name):
    S3_ClIENT.upload_fileobj(
    image_bytes_io, 
    bucket_name, 
    object_name,
    ExtraArgs={'ContentType': 'image/png'}
    )

def generate_image_string(images_list):
    output_str = ''
    for image_obj in images_list:
        output_str += "<img> id: {} | new image src: {}".format(image_obj['id'], image_obj['image_url']) + '\n'
    return output_str

In [44]:
def generate_images_and_upload(images_list):
    for image_obj in images_list:
        generated_image, img_compl = generate_image(image_obj['description'])
        image_id = str(uuid.uuid4())
        upload_image_to_s3(generated_image, BUCKET_NAME, image_id + '.png')
        image_url = BASE_IMAGE_URL.replace(PLACEHOLDER_URL, image_id)
        print(image_url)
        image_obj['image_id'] = image_id
        image_obj['image_url'] = image_url
        #image_obj['completion'] = img_compl.to_dict()
    return images_list

In [None]:
df_html['extract_images_completion'] = df_html.apply(lambda row: generate_images_and_upload(row['extract_images_completion_content']), axis=1)

In [48]:
from bs4 import BeautifulSoup

def replace_img_urls(images, html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    for image in images:
        img_tag = soup.find('img', id=image['id'])
        if img_tag:
            img_tag['src'] = image['image_url']
    modified_html = soup.prettify()
    return modified_html

In [49]:
df_html['html_images_replaced_completion'] = df_html.apply(lambda row: replace_img_urls(row['extract_images_completion_content'], row['html_added_content_completion_content']), axis=1)
df_html['html_images_replaced_completion'] = df_html['html_images_replaced_completion'].apply(lambda x: x.replace('```html','').replace('```',''))

In [52]:
write_html_to_file_and_image('../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/', 'html_images_replaced_completion', df_html)

../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/0.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/1.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/2.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/3.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/4.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/5.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/6.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/7.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/8.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/9.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/10.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/11.html
../data/generated/00_dataset_test/guis/01_baselines/zs_cot_content/12.html
../data/generated/00_dataset_test/g

In [471]:
df_html = df_html.rename(columns={'html_images_replaced_completion': 'generated_html'})

In [53]:
df_html.to_csv('../data/generated/00_dataset_test/data/01_baselines/zs_cot_content.csv', index=False)