In [6]:
import google.generativeai as genai
import ast
import os
import json
import PIL.Image
from browser import BrowserAutomation
from selenium.webdriver.common.by import By
from constants import  *
from dotenv import load_dotenv
load_dotenv()

GEMINI_API_KEY = os.environ.get("GOOGLE_GENERATIVE_AI_API_KEY")

In [7]:
def generate_content(prompt, system_prompt, image=None):
    api_key = GEMINI_API_KEY

    genai.configure(api_key=api_key)
    model = genai.GenerativeModel(
        "gemini-1.5-pro-latest",
        generation_config=genai.GenerationConfig(
            max_output_tokens=8000,
            temperature=0,
        ),
        system_instruction=system_prompt,
    )

    if image is None:
        response = model.generate_content(prompt, request_options={"timeout": 1000})
    else:
        response = model.generate_content(
            [prompt, image], request_options={"timeout": 1000}
        )
    return response.text

In [10]:
url = "https://vercel.com/geist/introduction"
prompt = "I want to see the docs"

browser = BrowserAutomation()
browser.open_browser(url)
html_string = browser.scrape()
screenshot = browser.take_screenshot("website.png")

img = PIL.Image.open('website.png')

In [11]:
url = browser.get_url()

user_prompt = f"""
current_url: {url}
current_page: {html_string}
current_screenshot is attached
Output selectors for relevant elements (divs, inputs, and images) that are relevant to the user's request.

user: {prompt}
"""

response = generate_content(user_prompt, system_prompt_interpret, img)
if "```json" in response:
    response = response.split("```json")[1].split("```")[0]
obj = json.loads(response)
obj

{'type': 'selectors',
 'selectors': [{'type': 'xpath',
   'selector': "//a[@href='/geist/introduction']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/colors']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/icons']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/brands']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/avatar']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/badge']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/button']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/calendar']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/checkbox']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/choicebox']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/code-block']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/collapse']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/combobox']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/context-menu']"},
  {'type': 'xpath', 'selector': "//a[@href='/geist/desc

In [15]:
design = PIL.Image.open("website.png")
response = generate_content(
    design_schema
    + "\n\n"
    + '"Only output div, button, input, img, and select elements. Do not use Tailwind Classes\nBase Url for images (if any): {url} \n\n',
    system_prompt_generate,
)

In [16]:
print(response)

```html
<div class='container mx-auto p-4'>
    <input class="border rounded-lg px-3 py-2 border-gray-300 focus:outline-none focus:ring focus:ring-indigo-500 focus:border-indigo-500" style="background-color: #FFF; color: #A5A5A5;" placeholder='Search...' special-id='//*[@id="twotabsearchtextbox"]'>
    <button class="bg-purple-700 hover:bg-purple-800 text-white font-medium py-2 px-4 rounded-lg" style="background-color: #9F03FE; color: #FFF" special-id='//*[@id="nav-search-submit-button"]'>
        Submit
    </button>
</div>
```
