# Playwright tests

In [None]:
from playwright.sync_api import sync_playwright
# NOT WORKING WITH CRON
def syncshot ():
    with sync_playwright() as p:
        browser = p.webkit.launch()
        page = browser.new_page()
        page.goto("http://whatsmyuseragent.org/")
        page.screenshot(path="../my-data/cron/example.png")
        browser.close()


In [None]:
# read config from json file ../my-date/cron/config.json
async def json_screenshot ():
    import json
    config = {}
    with open('my-config.json') as f:
        config = json.load(f)

    # get sites in config if present
    sites = config['sites'] if 'sites' in config else []
    print(sites)

    # get random site
    import random
    site = random.choice(sites) if len(sites) > 0 else 'https://studio.applh.com'
    url = site['url']
    name = site['name']
    print(url)

    # FIXME: KO
    # await screenshot(url, site)
    # syncshot()

    # check if .cache folder exists
    !ls -la /var/www 

    await screenshot(url, site)


In [None]:
async def screenshot (url, site):
    # if url is not set, return
    if not url:
        return
    
    # get name of site
    name = site['name'] if 'name' in site else 'unknown'
    path_data = site['path_data'] if 'path_data' in site else '.'

    target_file = site['target_file'] if 'target_file' in site else f"{path_data}/screenshot-{name}.png"

    try:
        from playwright.async_api import async_playwright
        playwright = await async_playwright().start()
        browser = await playwright.chromium.launch()
        page = await browser.new_page()
        await page.goto(url)
        
        # wait for page to load
        await page.wait_for_load_state('networkidle')

        await page.screenshot(path=target_file, full_page=True)
        await browser.close()
        await playwright.stop()
    except Exception as e:
        print(e)
        
    print(site)
    

    

In [None]:
def get_path_data (file, default='.'):
    import os.path
    search_dirs = ['../my-data', '../..']

    # search in search_dirs
    for search_dir in search_dirs:
        path_db = f"{search_dir}/{file}"
        # check if file exists
        if os.path.isfile(path_db):
            return search_dir

    return default

In [None]:
def zoom5_sync ():
    # sqlite db is ../../db-zoom5.sqlite or ../my-data/db-zoom5.sqlite
    import sqlite3
    import os

    path_data = get_path_data('db-zoom5.sqlite')
    path_db = f"{path_data}/db-zoom5.sqlite"
    
    print(path_db)
    conn = sqlite3.connect(path_db)
    c = conn.cursor()
    # get row with x not null
    c.execute("SELECT * FROM geocms WHERE x IS NOT NULL ORDER BY z DESC, id DESC")
    rows = c.fetchall()

    # print(c.description)
    map_col_index = {}
    for i, col in enumerate(c.description):
        map_col_index[col[0]] = i

    print(map_col_index)

    # loop in rows
    for row in rows:
        # FIXME: make more flexible
        id = row[map_col_index['id']]
        url = row[map_col_index['url']]
        title = row[map_col_index['title']]

        name = f"zoom5-{id}"
        zoom5_file = f"{path_data}/media/zoom5/screenshot-{name}.png"
        # check if file exists
        if not os.path.isfile(zoom5_file):
            # update row to x = null
            c.execute(f"UPDATE geocms SET x = NULL WHERE id = {id}")
            print(row)
        
        zoom5_md = f"{path_data}/media/zoom5/code-{name}.md"
        # check if file zoom5_md exists
        if not os.path.isfile(zoom5_md):
            # create the file
            with open(zoom5_md, 'w') as f:
                f.write(f"# {title}\n\n{url}\n\n`id={id}`\n")

    conn.commit()
    conn.close()

# zoom5_sync()


In [None]:
async def zoom5_screenshot ():
    # sqlite db is ../../db-zoom5.sqlite or ../my-data/db-zoom5.sqlite
    import sqlite3
    path_data = get_path_data('db-zoom5.sqlite')
    path_db = f"{path_data}/db-zoom5.sqlite"
    
    print(path_db)
    conn = sqlite3.connect(path_db)
    c = conn.cursor()
    # get row with x null and id z desc
    c.execute("SELECT * FROM geocms WHERE x IS NULL ORDER BY id DESC LIMIT 1")
    row = c.fetchone()

    if (row == None):
        print('no more row to process')
        return None
    
    print(row)
    url = row[4]
    id = row[0]
    
    name = f"zoom5-{id}"
    target_file = f"{path_data}/screenshot-{name}.png"

    # site 
    site = {
        'name': name,
        'url': url,
        'path_data': path_data,
        'target_file': target_file
    }
    print(site)

    # FIXME: don't block if there's a problem
    # BUT some request can timeout so it must retry later
    try:
        c.execute(f"UPDATE geocms SET x = 1 WHERE id = {id}")
        conn.commit()
    except Exception as e:
        print(e)

    # FIXME: what happens if too slow or timeout ?
    conn.close()

    # take the screenshot
    await screenshot(url, site)

    import os.path
    if os.path.isfile(target_file):
        # if file exists, display it
        from IPython.display import Image
        img = Image(filename=target_file)
        display(img)
        # move file to {path_data}/media/zoom5/
        !mv {target_file} {path_data}/media/zoom5/

    return row

In [None]:
async def loop_screenshot(count=5):
    zoom5_sync()
    # some website are protected against flood requests
    for i in range(count):
        print(i)
        row = await zoom5_screenshot()
        # if nothing to process, break
        if row == None:
            break

In [None]:
await loop_screenshot()

In [None]:
# list tmp files in current dir
!ls -lhtra ./tmp*