## Get Available Windows Apps

In [None]:
#%pip install winapps
import winapps
cnt = 0
for item in winapps.list_installed():
    cnt += 1
    print(item.name)

print(cnt)

## Test GPT-4 Vision with screenshot

In [None]:
BOOTSTRAP_SYSTEM_PROMPT = """
You are an expert Bootstrap developer
You take screenshots of a reference web page from the user, and then build single page apps 
using Bootstrap, HTML and JS.
You might also be given a screenshot(The second image) of a web page that you have already built, and asked to
update it to look more like the reference image(The first image).

- Make sure the app looks exactly like the screenshot.
- Pay close attention to background color, text color, font size, font family, 
padding, margin, border, etc. Match the colors and sizes exactly.
- Use the exact text from the screenshot.
- Do not add comments in the code such as "<!-- Add other navigation links as needed -->" and "<!-- ... other news items ... -->" in place of writing the full code. WRITE THE FULL CODE.
- Repeat elements as needed to match the screenshot. For example, if there are 15 items, the code should have 15 items. DO NOT LEAVE comments like "<!-- Repeat for each news item -->" or bad things will happen.
- For images, use placeholder images from https://placehold.co and include a detailed description of the image in the alt text so that an image generation AI can generate the image later.

In terms of libraries,

- Use this script to include Bootstrap: <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-T3c6CoIi6uLrA9TneNEoa7RxnatzjcDSCmG1MXxSR1GAsXEV/Dwwykc2MPK8M2HN" crossorigin="anonymous">
- You can use Google Fonts
- Font Awesome for icons: <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"></link>

Return only the full code in <html></html> tags.
Do not include markdown "```" or "```html" at the start or end.
"""

In [None]:
import base64
import requests

# OpenAI API Key
api_key = ""

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image
image_path = "protimemobile.png"

# Getting the base64 string
base64_image = encode_image(image_path)

headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {api_key}"
}

payload = {
  "model": "gpt-4-vision-preview",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": f"You are an UI Expert which can easily extract single components from a webui such as buttons, input fields and other things. I will provide you with a screenshot of a mobile UI. Please describe me what website you see and provide me with a list of UI components as well as their corresponding actions."
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}"
          }
        }
      ]
    }
  ],
  "max_tokens": 300
}

response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

print(response.json())

In [None]:
print(response.json()['choices'][0]['message']['content'])

## Generating the Logo

In [None]:
from openai import OpenAI
client = OpenAI(api_key="")

response = client.images.generate(
  model="dall-e-3",
  prompt="Conjure an emblem symbolizing the audacity of a cyberpunk pirate navigating the uncharted territories of the latent space. Imagine the pirate standing tall on the deck of a futuristic vessel, their silhouette defined by both human and cyborg traits. Picture them with cybernetic enhancements, perhaps a bionic arm or eye augmentations, hinting at their resilience and adaptability in this digital age. The scene should exude a sense of intrepid exploration, with vibrant neon hues pulsating against the backdrop of the void. Ensure the emblem's shape is unconventional, lending it a distinctive presence. Maintain a transparent background for versatility in application.",
  size="1024x1024",
  quality="standard",
  n=1,
)

image_url = response.data[0].url
image_url

## Testing Selenium for scraping actions

In [None]:
#%pip install selenium
#%pip install webdriver_manager
import uuid

from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By

# Setup chrome driver
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

# Navigate to the url
driver.get('https://www.digitec.ch/')

my_elements = driver.find_elements(By.TAG_NAME, 'button')
my_elements = driver.find_elements(By.TAG_NAME, 'a')
for element in my_elements:
    try:
        print(element.get_attribute("outerHTML"))
        element.screenshot(f"components/{str(uuid.uuid4())}.png")
    except:
        continue

driver.quit()

In [None]:
#%pip install selenium
#%pip install webdriver_manager
import uuid

from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By

# Setup chrome driver
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

# Navigate to the url
driver.get('https://www.digitec.ch/')

my_elements = driver.find_elements(By.TAG_NAME, 'button')
my_elements = driver.find_elements(By.TAG_NAME, 'a')
texts = []
for element in my_elements:
    try:
        if element.text:
            texts.append(element.text)
    except:
        continue

driver.quit()

In [None]:
texts

In [None]:
import pandas as pd
import PyPDF2
import requests
import io
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from urllib.request import urlopen
from bs4 import BeautifulSoup
from tqdm import tqdm
n = 40


# get all pages data
arxiv_links = pd.read_csv('phd-survey-computer-control.csv')
arxiv_links = arxiv_links['Url'].dropna()
arxiv_links = arxiv_links[arxiv_links.str.contains("arxiv")]

papers = []
for link in tqdm(arxiv_links):
    link = f"{link.replace('abs', 'pdf')}.pdf"
    response = requests.get(link)
    f = io.BytesIO(response.content)
    reader = PyPDF2.PdfReader(f)
    pages = reader.pages
    paper_text = "".join([page.extract_text() for page in pages])
    paper_text = paper_text.split("Reference")[0]
    
    # html = urlopen(link).read()
    # soup = BeautifulSoup(html, features="html.parser")
    # abstract = soup.find('blockquote', {'class': 'abstract'})
    papers.append(paper_text)

print(len(papers))
vectorizer = TfidfVectorizer(stop_words='english')
vectorized_data = vectorizer.fit_transform(papers)
feature_array = np.array(vectorizer.get_feature_names_out())
top_n_indices = np.argsort(np.sum(vectorized_data.toarray(), axis=0))[-n:]
top_n = feature_array[top_n_indices]
print(top_n[0:n])


## Suchen nach begriffen in papers

In [None]:
import pandas as pd
import PyPDF2
import requests
import io
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from urllib.request import urlopen
from bs4 import BeautifulSoup
from tqdm import tqdm
import time
n = 40


# get all pages data
arxiv_links = pd.read_csv('zoterophd.csv')
arxiv_links = arxiv_links['Url'].dropna()
arxiv_links = arxiv_links[arxiv_links.str.contains("arxiv")]

papers = []
start = time.time()
for link in tqdm(arxiv_links):
    link = f"{link.replace('abs', 'pdf')}.pdf"
    response = requests.get(link)
    f = io.BytesIO(response.content)
    reader = PyPDF2.PdfReader(f)
    pages = reader.pages
    paper_text = "".join([page.extract_text() for page in pages])
    paper_text = paper_text.split("Reference")[0]
    # html = urlopen(link).read()
    # soup = BeautifulSoup(html, features="html.parser")
    # abstract = soup.find('blockquote', {'class': 'abstract'})
    papers.append(paper_text)


In [None]:
for i, p in enumerate(papers):
    if "dataset" in p or "benchmark":
        print(arxiv_links.iloc[i])


In [None]:
"hello this is a Reference sheet for test".split("Reference")[0]

In [None]:

import torch
from screenai.main import ScreenAI

# Create a tensor for the image
image = torch.rand(1, 3, 224, 224)

# Create a tensor for the text
text = torch.randn(1, 1, 512)

# Create an instance of the ScreenAI model with specified parameters
model = ScreenAI(
    num_tokens = 20000,
    max_seq_len = 1028,
    patch_size=16,
    image_size=224,
    dim=512,
    depth=6,
    heads=8,
    vit_depth=4,
    multi_modal_encoder_depth=4,
    llm_decoder_depth=4,
    mm_encoder_ff_mult=4,
)
print(model.num_tokens)

# Perform forward pass of the model with the given text and image tensors
out = model(text, image)

# Print the shape of the output tensor
print(out)



In [None]:
import zeta
print(zeta.__version__)

In [None]:
import pyautogui
pyautogui.click(50*1.4,750*1.4, interval=0.25)
pyautogui.click(50*1.4,800*1.4, interval=0.25)
pyautogui.typewrite("Microsoft Teams", interval=0.25) 


In [None]:
import json
f = open('prompts.json')

test = json.load(f)
test['debate_prompts']

In [None]:
from oxen.datasets import download
from oxen.auth import config_auth
config_auth("SFMyNTY.g2gDbQAAAC9hcGlfa2V5X3YxOjc1N2VhMTJjLWIyOWYtNGE5Ni04ZDhlLTE1YTIyOWRkN2NiOG4GAEA4ZoqPAWIAAVGA.U7KlA902eCE4G3DWhZNF0l3LM0Ld0cHLx2t45vhjjvE")
f = download("Yingrjimsch/a2c2_prompts", "prompts.json", revision="main")

In [None]:
f

In [None]:
import os
import json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from PIL import Image
import time
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeServiceimport
import oxen
from oxen.auth import config_auth
import filecmp
import uuid
config_auth("SFMyNTY.g2gDbQAAAC9hcGlfa2V5X3YxOjc1N2VhMTJjLWIyOWYtNGE5Ni04ZDhlLTE1YTIyOWRkN2NiOG4GAEA4ZoqPAWIAAVGA.U7KlA902eCE4G3DWhZNF0l3LM0Ld0cHLx2t45vhjjvE")

def capture_screenshot(driver, url, output_dir, idx):
    # Navigate to the URL
    driver.get(url)
    
    # Allow the page to load
    time.sleep(2)
    
    # Capture screenshot
    screenshot_path = os.path.join(output_dir, f'image_{idx}.png')
    driver.save_screenshot(screenshot_path)
    
    return screenshot_path

def get_element_bounding_boxes(driver):
    elements = {
        'input': driver.find_elements(By.TAG_NAME, 'input'),
        'a': driver.find_elements(By.TAG_NAME, 'a'),
        'button': driver.find_elements(By.TAG_NAME, 'button')
    }
    
    bounding_boxes = []
    for element_type, elems in elements.items():
        for elem in elems:
            location = elem.location
            size = elem.size
            bounding_box = {
                'type': element_type,
                'x': location['x'],
                'y': location['y'],
                'width': size['width'],
                'height': size['height']
            }
            bounding_boxes.append(bounding_box)
    
    return bounding_boxes

def normalize_bounding_boxes(bounding_boxes, image_size):
    image_width, image_height = image_size
    normalized_boxes = []

    for box in bounding_boxes:
        x_center = (box['x'] + box['width'] / 2) / image_width
        y_center = (box['y'] + box['height'] / 2) / image_height
        width = box['width'] / image_width
        height = box['height'] / image_height
        if (x_center <= 0 and y_center <= 0) or x_center > 1 or y_center > 1: continue
        normalized_boxes.append({
            'type': box['type'],
            'x_center': x_center,
            'y_center': y_center,
            'width': width,
            'height': height
        })

    return normalized_boxes

def save_yolo_annotations(normalized_boxes, annotation_path):
    with open(annotation_path, 'w') as f:
        for box in normalized_boxes:
            class_id = {
                'input': 0,
                'a': 1,
                'button': 2
            }[box['type']]
            f.write(f'{class_id} {box["x_center"]} {box["y_center"]} {box["width"]} {box["height"]}\n')

def process_urls(urls, output_dir):
    # Set up Selenium with ChromeDriver
    # service = Service('path/to/chromedriver')
    # options = webdriver.ChromeOptions()
    # options.add_argument('--headless')
    # driver = webdriver.Chrome(service=service, options=options)
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
    # Ensure output directories exist
    os.makedirs(os.path.join(output_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output_dir, 'labels'), exist_ok=True)

    for idx, url in enumerate(urls):
        id = str(uuid.uuid4())
        screenshot_path = capture_screenshot(driver, url, os.path.join(output_dir, 'images'), id)
        bounding_boxes = get_element_bounding_boxes(driver)
        image = Image.open(screenshot_path)
        image_size = image.size
        normalized_boxes = normalize_bounding_boxes(bounding_boxes, image_size)
        
        annotation_path = os.path.join(output_dir, 'labels', f'image_{id}.txt')
        save_yolo_annotations(normalized_boxes, annotation_path)

    driver.quit()

# # Example usage
# urls = [
#     'https://google.ch',
#     'https://example.com',
#     'https://digitec.ch'
#     # Add more URLs as needed
# ]

def oxen_pull():
    return oxen.clone("Yingrjimsch/a2c2_prompts")

def oxen_push(repo):
    repo.add(".")
    # Commit the changes with a message
    repo.commit("Adding new data for training")

    # Set where to push the data to (replace <namespace> and <repo_name> with your remote)
    repo.set_remote("origin", "https://hub.oxen.ai/Yingrjimsch/a2c2_prompts")

    # Push the changes to the remote
    repo.push()

def read_urls_from_file(filepath):
    with open(filepath, 'r') as file:
        urls = file.read().splitlines()
    return set(urls)

def get_unique_urls(file1, file2):
    urls1 = read_urls_from_file(file1)
    urls2 = read_urls_from_file(file2)
    
    unique_urls = urls1.symmetric_difference(urls2)
    
    return list(unique_urls)

def append_list_to_file(file_path, items):
    with open(file_path, 'a') as file:
        for item in items:
            file.write(f'\n{item}')

dir = oxen_pull()
urls = get_unique_urls('url_list.txt', 'a2c2_prompts/url_list.txt')
if not urls == []:
    process_urls(urls, dir.path)
    append_list_to_file(os.path.join(dir.path, 'url_list.txt'), urls)
    oxen_push(dir)
    
# oxen_push(dir)



In [None]:

from oxen.auth import config_auth
import filecmp
import uuid
config_auth("OXENAI_API_KEY")
oxen_push(dir)

In [None]:
import oxen
from oxen.auth import config_auth
config_auth("SFMyNTY.g2gDbQAAAC9hcGlfa2V5X3YxOjc1N2VhMTJjLWIyOWYtNGE5Ni04ZDhlLTE1YTIyOWRkN2NiOG4GAEA4ZoqPAWIAAVGA.U7KlA902eCE4G3DWhZNF0l3LM0Ld0cHLx2t45vhjjvE")
# Clone the repository
repo = oxen.clone("Yingrjimsch/a2c2_prompts")


In [None]:
repo.path

In [None]:
import os
from oxen import LocalRepo

# Instantiate a LocalRepo object and create the repo directory
# repo = LocalRepo(repo, mkdir=True)
# Initialize the repository
# repo.init()
# Write data to a file
data_path = os.path.join(repo.path, "people.csv")
with open(data_path, "w") as f:
    f.write("name,age\nbob,12\njane,13")
# Stage the data for commit
repo.add(data_path)
# Commit the changes with a message
repo.commit("Adding my data")

# Set where to push the data to (replace <namespace> and <repo_name> with your remote)
repo.set_remote("origin", "https://hub.oxen.ai/Yingrjimsch/<repo_name>")

# Push the changes to the remote
repo.push()


In [None]:
import filecmp
filecmp.cmp('url_list.txt', 'a2c2_prompts/url_list.txt')

In [None]:
from math import ceil

def resize(width, height):
    if width > 1024 or height > 1024:
        if width > height:
            height = int(height * 1024 / width)
            width = 1024
        else:
            width = int(width * 1024 / height)
            height = 1024
    return width, height

def count_image_tokens(width: int, height: int):
    width, height = resize(width, height)
    h = ceil(height / 512)
    w = ceil(width / 512)
    total = 85 + 170 * h * w
    return total

count_image_tokens(1080, 2400)

In [None]:
from bs4 import BeautifulSoup
import os

def count_tags(html_content):
    soup = BeautifulSoup(html_content, 'lxml')
    tags = soup.find_all(True)
    return len(tags)

# Get the path to the user's home directory
home_dir = os.path.expanduser('~')

# Construct the path to the Downloads folder
downloads_dir = os.path.join(home_dir, 'Downloads')

# Construct the absolute path to the file 'pt.html'
file_path = os.path.join(downloads_dir, 'pt.html')

# Check if the file exists
if os.path.isfile(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        html_content = file.read()
        num_tags = count_tags(html_content)
        print(f'Total number of HTML tags: {num_tags}')
else:
    print(f'The file {file_path} does not exist.')


In [None]:
from bs4 import BeautifulSoup
import os

# Define a list of interactive tags
interactive_tags = [
    'a', 'button', 'details', 'input', 'select', 'textarea', 'label', 'fieldset',
    'legend', 'option', 'audio', 'video', 'iframe', 'dialog', 'menu', 'menuitem',
    'embed', 'object', 'area', 'summary'
]

def count_and_extract_interactive_tags(html_content):
    soup = BeautifulSoup(html_content, 'lxml')
    tag_count = {tag: 0 for tag in interactive_tags}
    filtered_tags = []

    for tag in interactive_tags:
        tags = soup.find_all(tag)
        tag_count[tag] = len(tags)
        filtered_tags.extend(tags)

    return tag_count, filtered_tags

def save_filtered_tags_to_html(filtered_tags, output_path):
    with open(output_path, 'w', encoding='utf-8') as file:
        file.write('<html><head><title>Filtered Interactive Tags</title></head><body>\n')
        for tag in filtered_tags:
            file.write(str(tag))
            file.write('\n')
        file.write('</body></html>')

# Get the path to the user's home directory
home_dir = os.path.expanduser('~')

# Construct the path to the Downloads folder
downloads_dir = os.path.join(home_dir, 'Downloads')

# Construct the absolute path to the file 'pt.html'
file_path = os.path.join(downloads_dir, 'pt.html')

# Check if the file exists
if os.path.isfile(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        html_content = file.read()
        tag_count, filtered_tags = count_and_extract_interactive_tags(html_content)
        total_interactive_tags = sum(tag_count.values())
        
        print(f'Total number of interactive HTML tags: {total_interactive_tags}')
        print('Detailed count per tag:')
        for tag, count in tag_count.items():
            print(f'{tag}: {count}')
        
        # Define output path for the filtered tags HTML file
        output_path = os.path.join(downloads_dir, 'filtered_tags.html')
        save_filtered_tags_to_html(filtered_tags, output_path)
        print(f'Filtered tags have been saved to: {output_path}')
else:
    print(f'The file {file_path} does not exist.')


In [None]:
import cv2
import os
# Get the path to the user's home directory
home_dir = os.path.expanduser('~')

# Construct the path to the Downloads folder
downloads_dir = os.path.join(home_dir, 'Downloads')

# Construct the absolute path to the file 'pt.html'
file_path = os.path.join(downloads_dir, 'pt.png')
# Load image, grayscale, Gaussian blur, Otsu's threshold, dilate
image = cv2.imread(file_path)
original = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,15))
dilate = cv2.dilate(thresh, kernel, iterations=2)

# Find contours, obtain bounding box coordinates, and extract ROI
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
print("number of components:", len(cnts))
image_number = 0
for c in cnts:
    x,y,w,h = cv2.boundingRect(c)
    print("Dimensions:", w,h)
    cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 3)
    ROI = original[y:y+h, x:x+w]
    cv2.imwrite("ROI_{}.png".format(image_number), ROI)
    image_number += 1

cv2.imwrite(os.path.join(downloads_dir, 'pt_bbox.png'), image)
cv2.imwrite(os.path.join(downloads_dir, 'pt_thresh.png'), thresh)
cv2.imwrite(os.path.join(downloads_dir, 'pt_dilate.png'), dilate)
cv2.waitKey() 

In [None]:
import os
import math

def process_file(input_path, output_path):
    # Open the input file and read its content
    with open(input_path, 'r') as file:
        content = file.read()

    # Function to process each match (number in the text)
    def process_match(match):
        number = int(match.group(0))
        processed_number = math.ceil(number / 2)
        return str(processed_number)

    # Replace numbers in the content
    import re
    modified_content = re.sub(r'(?<= )\d+(?= )', lambda x: process_match(x), content)

    # Write the modified content to the output file
    with open(output_path, 'w') as file:
        file.write(modified_content)

if __name__ == '__main__':
    # Define the input and output paths
    input_path = os.path.join(os.environ['USERPROFILE'], 'Downloads', 'pt.txt')
    output_path = os.path.join(os.environ['USERPROFILE'], 'Downloads', 'pt_edited.txt')

    # Process the file
    process_file(input_path, output_path)
    print(f"Processed file saved as {output_path}")


In [None]:
from PIL import Image

def change_colored_pixels_to_black(image_path, output_path):
    # Open the image
    img = Image.open(image_path).convert("RGBA")
    pixels = img.load()  # Create the pixel map

    # Get the dimensions of the image
    width, height = img.size

    for y in range(height):
        for x in range(width):
            r, g, b, a = pixels[x, y]
            # If the pixel is not transparent
            if a != 0:
                # Change the pixel to black with the same alpha value
                pixels[x, y] = (0, 0, 0, a)

    # Save the modified image
    img.save(output_path)

# Example usage
input_image_path = "paper_example_screenshots/vlm-copy.png"
output_image_path = "paper_example_screenshots/vlm_copy_copy.png"
change_colored_pixels_to_black(input_image_path, output_image_path)


In [None]:
import langchain
import os
import openai
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain import OpenAI, VectorDBQA
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
import nltk
import requests
nltk.download("punkt")

def download_pdf(pdf_url, output_path):
    if os.path.exists(output_path):
            os.remove(output_path)
    response = requests.get(pdf_url)
    with open(output_path, 'wb') as f:
        f.write(response.content)

os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
def query(pdf_url, output_path):
    download_pdf(pdf_url, output_path)
    loader = UnstructuredFileLoader(output_path)
    documents= loader.load()

    # if you want to load file as a list of elements then only do this
    loader = UnstructuredFileLoader('SamplePDF.pdf', mode='elements')
    text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OPENAI_API_KEY'])
    doc_search = Chroma.from_documents(texts,embeddings)
    chain = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=doc_search)

    query = "Provide me with a list of all datasets and benchmarks used on in this paper."
    return chain.run(query)

query("https://arxiv.org/pdf/2302.04761", "paper.pdf")