
# Webdriver

Selenium webdriver notebooks to control web pages using code.




## Client



### Initialization


#### the code

webdriver client?

selenium client?

create a generate client to be used by other notebook cells.


In [None]:
const { updateOrAddSession } = importer.import('selenium sessions')
const { Builder, Browser, By, Key, until } = require('selenium-webdriver')
const chrome = require('selenium-webdriver/chrome');
const os = require('os')
const path = require('path')
const fs = require('fs')

const APP_SUPPORT = process.env.APP_SUPPORT_DIR || process.env.LOCALAPPDATA 
  || (process.env.HOME ? (os.platform() == 'darwin' ? path.join(process.env.HOME, 'Library/Application Support') : path.join(process.env.HOME, '.config')) : void 0)
  || path.join(process.env.USERPROFILE, 'AppData/Local')
const WIN_PROFILE = path.join(APP_SUPPORT, 'Google/Chrome/User Data/Default')
const DARWIN_PROFILE = path.join(APP_SUPPORT, 'Google/Chrome/Default')
const LINUX_PROFILE = path.join(APP_SUPPORT, 'google-chrome/default')
const CHROME_PROFILE = os.platform() == 'win32' ? WIN_PROFILE : (os.platform() == 'darwin' ? DARWIN_PROFILE : LINUX_PROFILE)

async function getClient() {
  //var client = await remote(webdriverServer);
  //client.setTimeout({ 'implicit': 15000 })
  //client.setTimeout({ 'pageLoad': 15000 })
  //let windows = await client.getWindowHandles();
  let builder = await new Builder().forBrowser(Browser.CHROME)
  let options = new chrome.Options()
  options.port = 4444

  // this ensures the profile stays the same between runs, default is randomly generate a new profile every time
  if(fs.existsSync(CHROME_PROFILE)) {
    console.log('starting client', path.join(path.dirname(CHROME_PROFILE), 'not-Default'))
    options.addArguments('--user-data-dir=' + path.join(path.dirname(CHROME_PROFILE), 'not-Default'))
  } else {
    console.log('starting client', '/tmp/profile-1')
    options.addArguments('--user-data-dir=/tmp/profile-1')
  }

  options.addArguments('--no-sandbox')
  options.addArguments('--disable-session-crashed-bubble')
  options.addArguments('--disable-infobars')
  options.addArguments('--new-window')
  options.addArguments('--disable-geolocation')
  options.addArguments('--disable-notifications')
  options.addArguments('--show-saved-copy')
  options.addArguments('--silent-debugger-extension-api')

  //options.setUserPreferences({'download.default_directory': });
  options.setUserPreferences({
    'profile.default_content_setting_values.notifications': 2,
    'exited_cleanly': true,
    'exit_type': 'None'
  })

  builder.setChromeOptions(options)
  // builder.withCapabilities(options.toCapabilities())
  let driver = builder.build()

  // keep track of session in case it can be reattached later
  await updateOrAddSession((await driver.getSession()).getId())
  //let sessions = await getSessions(driver, true)

  let session = await driver.getSession()
  let original = session.getId()
  //for(let i = 0; i < sessions.length; i++) {
  //  if(sessions[i][1] != original) {
  //    await closeAllWindows(driver, sessions[i])
  //  }
  //}
  session.id_ = original
  return driver
}

module.exports = getClient



### Session


#### the code

selenium session?


In [None]:

const fs = require('fs');

const PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
const TOKEN_DIR = path.join(PROFILE_PATH, '.credentials');
const SESSIONS_PATH = path.join(TOKEN_DIR, 'sessions.json');

let sessions = [];
let sessionModified = 0;

function readSessions() {
    try {
        if (fs.existsSync(SESSIONS_PATH)
            && fs.statSync(SESSIONS_PATH).mtime.getTime() > sessionModified) {
            sessionModified = fs.statSync(SESSIONS_PATH).mtime.getTime();
            sessions = JSON.parse(fs.readFileSync(SESSIONS_PATH)
                .toString());
        }
    } catch (e) {
        sessions = [];
    }
    return sessions;
}


function updateOrAddSession(currentSession) {
    const sessions = readSessions();

    if (!currentSession) {
        return sessions;
    }
    // don't update sessions while scanning
    const updateSession = sessions.filter(s => s[1] === currentSession)[0];
    if (typeof updateSession !== 'undefined') {
        console.log('update ' + currentSession);
        updateSession[0] = (new Date()).getTime();
    } else {
        console.log('insert ' + currentSession);
        const oldSession = sessions[sessions.length] = [];
        // http://www.english.upenn.edu/~jenglish/English104/tzara.html
        oldSession[1] = currentSession;
        oldSession[0] = (new Date()).getTime();
    }
    console.log('writing ' + SESSIONS_PATH)
    fs.writeFileSync(
        SESSIONS_PATH,
        JSON.stringify(sessions, null, 4));
    return sessions;
}


module.exports = {
    updateOrAddSession,
    readSessions,
}



### Test

Test the driver.




#### the code

webdriver test?


In [None]:

const getClient = importer.import('webdriver client')

async function testDriver() {
  let driver = await getClient()
  try {
    let facebook = 'https://www.facebook.com'

    await driver.get(facebook)

    await new Promise(resolve => setTimeout(resolve, 4000))
  } catch (e) {
    console.log(e)
  } finally {
    driver.quit()
  }
}

module.exports = testDriver



### Close All



#### the code

close all windows?


In [None]:
const { WebDriver, Capabilities, Session } = require('selenium-webdriver')
const createExecutor = importer.import('selenium executor')

const LOCAL_URL = 'http://localhost:4444/wd/hub';

async function closeAllWindows(driver, sessionId, keep) {
  let driver2 = new WebDriver(
    new Session(sessionId, Capabilities.chrome()), createExecutor(Promise.resolve(LOCAL_URL)))

  try {
    let windows = await driver2.getAllWindowHandles()
    console.log('closing session ' + sessionId[1] + ' windows ' + windows)
    for(let i = 0; i < windows.length; i++) {
      if(windows[i] != keep) {
        await driver2.switchTo().window(windows[i])
        await driver2.close()
      }
    }
  } catch (e) {
    console.log(e)
  }
}

module.exports = closeAllWindows



### Executor

Override the built in executor because it won't let me change session Ids manually anymore.




#### the code

selenium executor?


In [None]:
const _http = require('selenium-webdriver/http');

const Command = {
  LAUNCH_APP: 'launchApp',
  GET_NETWORK_CONDITIONS: 'getNetworkConditions',
  SET_NETWORK_CONDITIONS: 'setNetworkConditions',
  DELETE_NETWORK_CONDITIONS: 'deleteNetworkConditions',
  SEND_DEVTOOLS_COMMAND: 'sendDevToolsCommand',
  SEND_AND_GET_DEVTOOLS_COMMAND: 'sendAndGetDevToolsCommand',
  SET_PERMISSION: 'setPermission',
  GET_CAST_SINKS: 'getCastSinks',
  SET_CAST_SINK_TO_USE: 'setCastSinkToUse',
  START_CAST_DESKTOP_MIRRORING: 'startDesktopMirroring',
  START_CAST_TAB_MIRRORING: 'setCastTabMirroring',
  GET_CAST_ISSUE_MESSAGE: 'getCastIssueMessage',
  STOP_CASTING: 'stopCasting',
}

function configureExecutor(executor, vendorPrefix) {
  executor.defineCommand(Command.LAUNCH_APP, 'POST', '/session/:sessionId/chromium/launch_app')
  executor.defineCommand(Command.GET_NETWORK_CONDITIONS, 'GET', '/session/:sessionId/chromium/network_conditions')
  executor.defineCommand(Command.SET_NETWORK_CONDITIONS, 'POST', '/session/:sessionId/chromium/network_conditions')
  executor.defineCommand(Command.DELETE_NETWORK_CONDITIONS, 'DELETE', '/session/:sessionId/chromium/network_conditions')
  executor.defineCommand(Command.SEND_DEVTOOLS_COMMAND, 'POST', '/session/:sessionId/chromium/send_command')
  executor.defineCommand(
    Command.SEND_AND_GET_DEVTOOLS_COMMAND,
    'POST',
    '/session/:sessionId/chromium/send_command_and_get_result',
  )
  executor.defineCommand(Command.SET_PERMISSION, 'POST', '/session/:sessionId/permissions')
  executor.defineCommand(Command.GET_CAST_SINKS, 'GET', `/session/:sessionId/${vendorPrefix}/cast/get_sinks`)
  executor.defineCommand(
    Command.SET_CAST_SINK_TO_USE,
    'POST',
    `/session/:sessionId/${vendorPrefix}/cast/set_sink_to_use`,
  )
  executor.defineCommand(
    Command.START_CAST_DESKTOP_MIRRORING,
    'POST',
    `/session/:sessionId/${vendorPrefix}/cast/start_desktop_mirroring`,
  )
  executor.defineCommand(
    Command.START_CAST_TAB_MIRRORING,
    'POST',
    `/session/:sessionId/${vendorPrefix}/cast/start_tab_mirroring`,
  )
  executor.defineCommand(
    Command.GET_CAST_ISSUE_MESSAGE,
    'GET',
    `/session/:sessionId/${vendorPrefix}/cast/get_issue_message`,
  )
  executor.defineCommand(Command.STOP_CASTING, 'POST', `/session/:sessionId/${vendorPrefix}/cast/stop_casting`)
}

function createExecutor(url, vendorPrefix) {
  const agent = new _http.Agent({ keepAlive: true })
  const client = url.then((url) => new _http.HttpClient(url, agent))
  const executor = new _http.Executor(client)
  configureExecutor(executor, vendorPrefix)
  return executor
}

module.exports = createExecutor



### List Sessions



#### the code

list sessions?


In [None]:
const {readSessions} = importer.import('selenium session')
const { WebDriver, Capabilities, Session } = require('selenium-webdriver')
const chrome = require('selenium-webdriver/chrome');
const createExecutor = importer.import('selenium executor')
const getClient = importer.import('webdriver client')

const LOCAL_URL = 'http://localhost:4444/wd/hub';

async function verifySession(driver, sessionId) {
  let driver2 = new chrome.Driver(
    new Session(sessionId[1], Capabilities.chrome()), createExecutor(Promise.resolve(LOCAL_URL)))

  try {
    let windows = await driver2.getAllWindowHandles()
    console.log('windows ', windows)
    //await driver.switchTo().window(window)
    //let status = await driver.getSession()
    return sessionId[1]
  } catch (e) {
    console.log(e)
  }
}

async function getSessions(driver, inactive = false) {
  const sessions = readSessions()
  if(!driver) {
    driver = await getClient()
  }
  //const session = await driver.getSession()
  //const original = session.getId()
  let active = [].concat(sessions)
      .filter(session => typeof session[1] !== 'undefined'
              && session[1] !== null && session[1].length > 0);
  //if(inactive) {
  //    active = active.filter(session => (new Date()).getTime() - session[0] > TIMEOUT);
  //}
  let cancelled = false;
  let available = []
  for(let i = 0; i < active.length; i++) {
    let r = await verifySession(driver, active[i])
    if(typeof r !== 'undefined') {
      available[available.length] = active[i]
    }
    if(inactive) {
      cancelled = true;
    } else {
    }
  }
  //session.id_ = original;

  return available
    .filter(sess => typeof sess !== 'undefined' && sess !== null)
    .filter((elem, pos, arr) => arr.indexOf(elem) === pos)
}


module.exports = getSessions



## Selenium Dom



### Select Dom

Perform the same functions as our notebook selectDom() function in Core/patterns.ipynb use for selecting elements from code conversions.

This performs the selection on the client and relies on webdriver communication to keep track of the referenced elements. This is a natural use case for web driver, this just makes the interface easy to select from.



#### the code

selenium select?

TODO: include JSDOM webpacked for javascript web-worker service evaluations where document DOM is not available.


In [None]:

const {selectDom, evaluateDom} = importer.import('select tree')
const {walkTree} = importer.import('walk tree')

async function getAllXPath(driver, select, ctx) {
  return await driver.executeScript((
    function main(evaluateDomString, walkTreeString, selectDomString, select, ctx) {
      if(!window.evaluateDom)
        window.evaluateDom = eval('(' + evaluateDomString + ')')
      if(!window.walkTree)
        window.walkTree = eval('(' + walkTreeString + ')')
      if(!window.selectDom)
        window.selectDom = eval('(' + selectDomString + ')')
      let result = selectDom(select, ctx || document)
      return result;
    }), evaluateDom, walkTree, selectDom, select, ctx)
}

module.exports = getAllXPath


## old way

How to start a webdriver client in node

:



In [None]:
var importer = require('../Core');
var {remote} = require('webdriverio');
var {
    getSessions,
    onlyOneWindow,
    getAllSessionUrls,
} = importer.import([
    'only one window',
    'get all session urls',
    'manage webdriver sessions'
])
var MAX_SESSIONS = 4;
//var MAX_SESSIONS = 36;

function createWebdriverClient(host, port) {
    var webdriverServer = {
        services: ['selenium-standalone', 'chromedriver'],
        sync: false,
        debug: false,
        host: host || 'localhost',
        port: port || 4444,
        logLevel: 'silent',
        baseUrl: 'https://webdriver.io',
        pageLoadStrategy: 'eager',
        connectionRetryTimeout: 1000,
        capabilities: {
            browserName: 'chrome',
            'goog:chromeOptions': {
                prefs: {
                    'download.default_directory': '/data/downloads',
                    'profile.default_content_setting_values.notifications': 2,
                    'exited_cleanly': true,
                    'exit_type': 'None'
                },
                args: [
                    // We stopped using sessions here because it injects the session using the API below
                    // TODO: https://superuser.com/questions/461035/disable-google-chrome-session-restore-functionality
                    //'user-data-dir=/tmp/profile-' + MAX_SESSIONS + 1,
                    // 'start-fullscreen',
                    'no-sandbox',
                    'disable-session-crashed-bubble',
                    'disable-infobars',
                    'new-window',
                    'disable-geolocation',
                    'disable-notifications',
                    'show-saved-copy',
                    'silent-debugger-extension-api'
                    //'kiosk'
                ]
            }
        },
    };
    
    //console.log('deleting webdriver from cache');
    //Object.keys(require.cache).filter(k => k.includes('webdriver') || k.includes('wdio'))
    //    .forEach(k => delete require.cache[k]);
    var promise = remote(webdriverServer);
    var client;
    //remote.on('error', e => console.log(e.message));
    //remote.on('end', () => console.log('Daemon: Closing browser'));
    const connectSession = importer.import('connect webdriver session');
    return promise
        .then(r => client = r)
        .then(() => connectSession(client))
        .then(() => getSessions(client))
        .then(() => onlyOneWindow(client))
        .then(() => getAllSessionUrls(client))
        .catch(e => {
            console.log(e);
            isError = e;
            throw new Error('there is an error with the client ' + e);
        })
        .then(() => client);
}

module.exports = createWebdriverClient;


connect to webdriver session

find webdriver sessions


In [None]:
var importer = require('../Core');
var readSessions = importer.import('load webdriver sessions');
var {
    getSessions,
    lockPromise,
    updateOrAddSession
} = importer.import('manage webdriver sessions');

var TIMEOUT = 10000;
var MAX_SESSIONS = 4;

function connectSession(client) {
    var isError = false;
    return lockPromise(true, true)
        .then(() => getSessions(client, true))
        // save current session
        .then(validSessions => {
            isError = false;
            var sessions = readSessions();
            // the next null or end will be the next available profile id
            var index = sessions.map(s => s[1]).indexOf(validSessions[0] || 0);
            if(index === -1) {
                console.log('session not found ' + validSessions[0]);
                index = sessions.length;
            }
            if(index >= MAX_SESSIONS) {
                throw new Error('Already running max sessions ' + MAX_SESSIONS);
            }
            client.options.connectionRetryTimeout = TIMEOUT;
            //client.options.capabilities['goog:chromeOptions'].args[0] = 'user-data-dir=/tmp/profile-' + index;
            // TODO: fix this, doesn't work on second init, keeps opening new windows if chrome profile path is alreading open for read/write
            if(typeof validSessions[0] !== 'undefined') {
                console.log('using existing session ' + index + ' - ' + validSessions[0]);
                client.sessionId = validSessions[0];
            } else {
                console.log('new session ' + index);
            }
        })
        .then(() => client.status())
        .then(r => updateOrAddSession(client.sessionId))
        .catch(e => {
            console.log(e);
            client.sessionId = null;
            isError = e;
        })
        .then(() => lockPromise(false, true))
        .then(() => {
            if(isError)
                throw isError;
            return client.sessionId;
        })
}
module.exports = connectSession;


Load webdriver sessions



In [None]:
var fs = require('fs');
var path = require('path');

var TOKEN_DIR = path.join(process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE, '.credentials');
var SESSIONS_PATH = path.join(TOKEN_DIR, 'sessions.json');

var sessions = [];
var sessionModified = 0;

function readSessions() {
    try {
        if(fs.existsSync(SESSIONS_PATH)
           && fs.statSync(SESSIONS_PATH).mtime.getTime() > sessionModified) {
            sessionModified = fs.statSync(SESSIONS_PATH).mtime.getTime();
            sessions = JSON.parse(fs.readFileSync(SESSIONS_PATH)
                .toString());
        }
    } catch (e) {
        sessions = [];
    }
    return sessions;
};
module.exports = readSessions;


update session


In [None]:
var lockFile = require('lockfile');
var fs = require('fs');
var path = require('path');
var importer = require('../Core');
var readSessions = importer.import('load webdriver sessions');

var TOKEN_DIR = path.join(process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE, '.credentials');
var SESSIONS_PATH = path.join(TOKEN_DIR, 'sessions.json');
var INIT_WAIT = 60000; // 36 * session test time * number of simultaneous sessions
var UPDATE_WAIT = 1000;

// lock / unlock
// insert - posibility of a session being reused, but sych session starts
function lockPromise(lock = true, init = false) {
    console.log((init ? 'init' : 'update') + ' - ' + (lock ? 'locking' : 'unlocking'));
    return new Promise((resolve, reject) => {
        const func = lock ? lockFile.lock : lockFile.unlock;
        const p = SESSIONS_PATH + '.' + (init ? 'init' : 'update') + '.lock';
        return func.apply(lockFile, [p].concat(lock ? [{
            stale: init ? INIT_WAIT : UPDATE_WAIT,
            wait: init ? INIT_WAIT : UPDATE_WAIT
        }] : []).concat([(err) => {
            if(err) {
                return reject(err);
            }
            console.log((init ? 'init' : 'update') + ' - ' + (lock ? 'lock' : 'unlock'));
            resolve();
        }]));
    });
}

function updateOrAddSession(currentSession) {
    const sessions = readSessions();
    if(!currentSession) {
        return sessions;
    }
    // don't update sessions while scanning
    const updateSession = sessions.filter(s => s[1] === currentSession)[0];
    if(typeof updateSession !== 'undefined') {
        console.log('update ' + currentSession);
        updateSession[0] = (new Date()).getTime();
    } else {
        console.log('insert ' + currentSession);
        const oldSession = sessions[sessions.length] = [];
        // http://www.english.upenn.edu/~jenglish/English104/tzara.html
        oldSession[1] = currentSession;
        oldSession[0] = (new Date()).getTime();
    }
    fs.writeFileSync(
        SESSIONS_PATH,
        JSON.stringify(sessions, null, 4));
    return sessions;
}
module.exports = {
    updateOrAddSession,
    lockPromise
};


Manage webdriver sessions



In [None]:
var importer = require('../Core');
var readSessions = importer.import('load webdriver sessions');
var {
    verifySession,
    lockPromise,
    updateOrAddSession
} = importer.import('verify session');

var TIMEOUT = 10000;

function getSessions(client, inactive = false) {
    const sessions = readSessions();
    const original = client.sessionId;
    var active = [].concat(sessions)
        .filter(session => typeof session[1] !== 'undefined'
                && session[1] !== null && session[1].length > 0);
    if(inactive) {
        active = active.filter(session => (new Date()).getTime() - session[0] > TIMEOUT);
    }
    var cancelled = false;
    return importer.runAllPromises(active.map(session => (resolve) => {
        if(cancelled) {
            return resolve();
        }
        console.log(session);
        return verifySession(client, session)
            .catch(e => console.log(e))
            .then(r => {
                // only try to find 1 decent session
                if(inactive && typeof r !== 'undefined') {
                    cancelled = true;
                }
                return resolve(r);
            })
    }))
        .then(available => {
            client.sessionId = original;
            return available
                .filter(sess => typeof sess !== 'undefined' && sess !== null)
                .filter((elem, pos, arr) => arr.indexOf(elem) === pos)
        })
}

module.exports = {
    getSessions,
    lockPromise,
    updateOrAddSession
};



verify session



In [None]:
var importer = require('../Core');
var {
    updateOrAddSession,
    lockPromise
} = importer.import('update session');

var TIMEOUT = 10000;
var scanning = false;

var sessions = [];

var first = false;
function addPlugins(client) {
    if(!first) {
        first = true;
        client.on('result', (result) => {
            if(scanning) {
                return;
            }
            const currentSession = client.sessionId;
            const updateSession = sessions.filter(s => s[1] === currentSession)[0];

            // only update the session often enough that it isn't reused by another process
            if(typeof updateSession !== 'undefined') {
                if((new Date()).getTime() - updateSession[0] <= TIMEOUT / 2) {
                    return;
                }
            }

            return lockPromise(true)
                .then(() => updateOrAddSession(currentSession))
                .then(s => (sessions = s))
                .then(() => lockPromise(false))
                .catch(e => console.log(e));
        });
    }
}

function verifySession(client, session) {
    client.sessionId = session[1];
    var alreadyScanning = false;
    addPlugins(client);
    alreadyScanning = scanning;
    scanning = true
    return client.getWindowHandle()
        .then(r => client.switchToWindow(r))
        .then(() => client.status())
        .then(s => session[1])
        .catch(e => {
            scanning = false || alreadyScanning;
            if(e.message === 'ESOCKETTIMEDOUT' || e.message.includes('no such session') || e.message.includes('chrome not reachable')) {
                console.log('unusable session ' + session);
                session[1] = '';
                return;
            } else {
                console.log('error ' + session[1]);
                console.log(e)
                throw e;
            }
            // if the session is really old and has an error delete it from the list
            //const index = sessions.map(s => s[1]).indexOf(session[1]);
            //sessions[index][1] = null;
        })
        .then(r => {
            scanning = false || alreadyScanning;
            return r;
        })
}

module.exports = {
    lockPromise, verifySession, updateOrAddSession, scanning
};



In [None]:
if(typeof client !== 'undefined' && typeof $$ !== 'undefined') {
    $$.async();
    var client = createWebdriverClient('localhost', 4444)
        .then(r => $$.sendResult(r))
        .catch(e => $$.sendError(e));
}


In [None]:
if(typeof client !== 'undefined' && typeof $$ !== 'undefined') {
    $$.async();
    client.windowHandles()
        .then(r => $$.sendResult(r))
        .catch(e => $$.sendError(e));
}


How to end the webdriver service


In [None]:
if(typeof client !== 'undefined') {
    client.endAll();
}



TODO: add decorated logging with screenshots of buttons results can be used: https://github.com/megamindbrian/bots/blob/master/bots/server.js

TODO: transfer state and cache to client

