Pull some HTML from wayback machine and store it


In [None]:
var importer = require('../Core');
var util = require('util');
var request = util.promisify(require('request'));
var {JSDOM} = require('jsdom');
var fs = require('fs');
var path = require('path');

var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Collections/flagstaff-happy';


var locations, getAllXPath;
function getLocations() {
    if(typeof locations !== 'undefined') {
        return Promise.resolve(locations);
    }
    return request('https://web.archive.org/web/20160322001433/http://keepflaghappy.com/happy-hours/friday/')
        .then(r => {
            const getAllXPath = importer.import('all elements xpath from string')(r.body);
            return getAllXPath([
                '//a[contains(@href, "by-location")]/@href',
            ]);
        })
        .then(r => {
            locations = r
                .filter(l => l.match(/by-location\/[^#]+/ig))
                .map(l => l.includes('archive.org') ? l : 'https://web.archive.org' + l);
            return locations;
        });
}

function getLocation(l) {
    console.log('Downloading ' + l);
    return request(l)
        .then(r => {
            const getAllXPath = importer.import('all elements xpath from string')(r.body);
            return getAllXPath([
                '//h3[contains(., "Mon") or contains(., "Tue") or contains(., "Wed") or contains(., "Thu") or contains(., "Fri") or contains(., "Sat") or contains(., "Sun")]',
                {
                    dotw: './text()',
                    time: './following-sibling::p[count(./preceding-sibling::h3)=$i+1]//text()',
                    deals: './following-sibling::ul[count(./preceding-sibling::h3)=$i+1]//text()'
                }
            ]);
        })
        .then(r => {
            const happy = r.map(l => {
                return {
                    dotw: l.dotw,
                    time: l.time,
                    deals: l.deals.join('\n').trim().split(/\s*\n+\s*/ig)
                };
            });
            const name = l.trim().replace(/\/$/ig, '').split('/').pop().replace(/[^a-z0-9-_]/ig, '_');
            fs.writeFileSync(path.join(project, name + '.json'), JSON.stringify(happy, null, 4));
            return happy;
        })
}

$$.async();
getLocations()
    .then(r => {
        return importer.runAllPromises(r.map(l => resolve => {
            return getLocation(l)
                .then(r => setTimeout(() => resolve(r), 100))
        }));
    })
    .then(r => $$.mime({'text/plain': JSON.stringify(r, null, 4)}))
    .catch(e => $$.sendError(e))



Gather resturant data from google maps?

google maps data list?


In [None]:
var importer = require('../Core');
var runSeleniumCell = importer.import('selenium cell');
var fs = require('fs');
var path = require('path');

var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Collections/flagstaff-happy';

function getResultsPage() {
    return client
        .then(() => getAllXPath([
            '//*[contains(@class, "section-result-text-content")]',
            {
                name: './/h3[contains(@class, "section-result-title")]//text()',
                description: './/span[contains(@class, "section-result-details")]//text()|.//span[contains(@class, "section-result-location")]//text()'
            }
        ]))
        .then(r => r.map(l => ({
            name: typeof l.name === 'string' ? l.name : l.name.join('\n').trim(),
            description: typeof l.description === 'string'
                ? l.description
                : l.description.join('\n').trim().split(/\s*\n\s*/ig),
        })));
}

function getAllResults() {
    var locations = [];
    return getResultsPage()
        .then(newLocs => {
            locations = newLocs;
            return client.isExisting('//*[contains(@class, "section-pagination-right")]//button[contains(@aria-label, "Next page") and not(@disabled)]');
        })
        .then(is => {
            if(is) {
                return client.click('//*[contains(@class, "section-pagination-right")]//button[contains(@aria-label, "Next page") and not(@disabled)]')
                    .pause(3000)
                    .then(() => getAllResults())
                    .then(newLocs => locations.concat(newLocs))
            } else {
                return locations;
            }
        });
}

function getNearbyJSON(place) {
    if(typeof place === 'undefined') {
        place = 'bars+near+Flagstaff,+AZ';
    }
    return client.url('https://www.google.com/maps/search/' + place)
        .then(() => getAllResults())
        .then(r => {
            const day = new Date();
            const date = day.getFullYear() + '-' + (day.getMonth() + 1) + '-' + day.getDate();
            fs.writeFileSync(path.join(project, 'locations-' + date + '.json'), JSON.stringify(r, null, 4));
            return r;
        })
}
module.exports = getNearbyJSON;

if(typeof $$ !== 'undefined') {
    $$.async();
    runSeleniumCell('google maps data list', false)
        .then(func => func())
        .then(r => $$.sendResult(r))
        .catch(e => $$.sendError(e))
}


convert location names to google maps locations with happy hour data


In [18]:
var importer = require('../Core');
var fs = require('fs');
var path = require('path');
var glob = require('glob');
var {
    placesNearby,
    placeDetails,
    runSeleniumCell,
    levDist,
    levSort
} = importer.import([
    'use places nearby',
    'place details google maps',
    'run selenium cell',
    'find levenshtien distance',
    'sort levenshtien'
]);

var PROFILE_PATH = process.env.HOME || process.env.HOMEPATH || process.env.USERPROFILE;
var project = PROFILE_PATH + '/Collections/flagstaff-happy';

function loadLocations() {
    const day = new Date();
    const date = day.getFullYear() + '-' + (day.getMonth() + 1) + '-' + day.getDate();
    const filename = path.join(project, 'locations-' + date + '.json');
    if(fs.existsSync(filename)) {
        const locations = JSON.parse(fs.readFileSync(locations).toString());
        return Promise.resolve(locations);
    }
    return runSeleniumCell('google maps data list', false)
        .then(func => func());
}

function getAllLocationsData() {
    const day = new Date();
    const date = day.getFullYear() + '-' + (day.getMonth() + 1) + '-' + day.getDate();
    const filename = path.join(project, 'locations-' + date + '-full.json');
    if(fs.existsSync(filename)) {
        return Promise.resolve(JSON.parse(fs.readFileSync(filename).toString()));
    }
    var locations;    
    return loadLocations()
        .then(r => locations = r)
        .then(() => importer.runAllPromises(locations.map(l => resolve => {
            var result;
            placesNearby(l.name + ' near ' + l.description.pop() + ', Flagstaff')
                .then(r => {
                    result = r[0];
                    return placeDetails(result.place_id);
                })
                .then(r => resolve(Object.assign(l, result, r)))
        })))
        .then(r => {
            fs.writeFileSync(filename, JSON.stringify(r, null, 4));
            return r;
        })
}

if(typeof $$ !== 'undefined') {
    $$.async();
    getAllLocationsData()
        .then(r => {
            const hasSites = r.filter(l => l.website);
            console.log('websites: ' + hasSites.length + '/' + r.length + ' - ' + Math.round(1.0 * hasSites.length / r.length * 100) + '%')
            //console.log(r.filter(l => l.opening_hours).map(l => l));
            const existing = glob.sync('**/!(locations)*', {cwd: project});
            const existingMatch = existing
                .map(l => l.split(/-|\.json/ig).join(' ').trim())
                .map(l => {
                    const levMatch = levSort(r, l, r => r.name)[0].name;
                    if(levDist(l, levMatch) < l.length / 2) {
                        return l + ' - ' + levMatch;
                    }
                    return l + ' - no match';
                })
                .filter(l => !l.includes('no match'))
            console.log('existing: ' + existingMatch.length + '/' + existing.length + ' - ' + Math.round(1.0 * existingMatch.length / existing.length * 100) + '%')
            console.log('variance: ');
            const variations = existing.map(l => path.join(project, l))
                .reduce((acc, l) => {
                    const loaded = JSON.parse(fs.readFileSync(l));
                    if(typeof acc.dotw === 'undefined') {
                        acc.dotw = {};
                    }
                    loaded.forEach(d => {
                        if(d.dotw.match(/Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday/ig)) {
                            if(typeof acc['dotw']['matches'] === 'undefined') {
                                acc['dotw']['matches'] = 0;
                            }
                            acc['dotw']['matches']++;
                        } else {
                            if(typeof acc['dotw']['unmatched'] === 'undefined') {
                                acc['dotw']['unmatched'] = [];
                            }
                            acc['dotw']['unmatched'].push(l + d.dotw);
                        }
                    });
                    if(typeof acc.time === 'undefined') {
                        acc.time = {};
                    }
                    loaded.forEach(d => {
                        console.log(l + ' - ' + d.time)
                        if(d.time.match(/^([0-9]+(p|a)\.m\.\s+-\s+([0-9]+(p|a)|close)|all day)\s*$/ig)) {
                            if(typeof acc['time']['matches'] === 'undefined') {
                                acc['time']['matches'] = 0;
                            }
                            acc['time']['matches']++;
                        } else {
                            if(typeof acc['time']['unmatched'] === 'undefined') {
                                acc['time']['unmatched'] = [];
                            }
                            acc['time']['unmatched'].push(l + d.dotw);
                        }
                    });
                    return acc;
                }, {});
            console.log(variations)
            return r;
        })
        .then(r => $$.sendResult(r))
        .catch(e => $$.sendError(e))
}


websites: 51/59 - 86%
existing: 17/49 - 35%
variance: 
/Users/briancullinan/Collections/flagstaff-happy/altitudes-bar-and-grill.json - 3 p.m.-6 p.m.
/Users/briancullinan/Collections/flagstaff-happy/altitudes-bar-and-grill.json - 3 p.m.-6 p.m.
/Users/briancullinan/Collections/flagstaff-happy/altitudes-bar-and-grill.json - All day
/Users/briancullinan/Collections/flagstaff-happy/altitudes-bar-and-grill.json - 3 p.m.-6 p.m.
/Users/briancullinan/Collections/flagstaff-happy/altitudes-bar-and-grill.json - 3 p.m.-6 p.m.
/Users/briancullinan/Collections/flagstaff-happy/arrows-sports-bar-twin-arrows-casino.json - 4 p.m.-7 p.m. 
/Users/briancullinan/Collections/flagstaff-happy/arrows-sports-bar-twin-arrows-casino.json - 4 p.m.-7 p.m. 
/Users/briancullinan/Collections/flagstaff-happy/arrows-sports-bar-twin-arrows-casino.json - 4 p.m.-7 p.m. 
/Users/briancullinan/Collections/flagstaff-happy/arrows-sports-bar-twin-arrows-casino.json - 4 p.m.-7 p.m. 
/Users/briancullinan/Collections/flagstaff-happy/

/Users/briancullinan/Collections/flagstaff-happy/el-capitan.json - 11 a.m.–4 p.m. 
/Users/briancullinan/Collections/flagstaff-happy/el-capitan.json - 9 a.m.–4 p.m. 
/Users/briancullinan/Collections/flagstaff-happy/el-capitan.json - 9 a.m.–4 p.m.
/Users/briancullinan/Collections/flagstaff-happy/fat-olives.json - all day
/Users/briancullinan/Collections/flagstaff-happy/fat-olives.json - 4 p.m.-6 p.m.
/Users/briancullinan/Collections/flagstaff-happy/fat-olives.json - All day
/Users/briancullinan/Collections/flagstaff-happy/fat-olives.json - 4 p.m.-6 p.m.
/Users/briancullinan/Collections/flagstaff-happy/fat-olives.json - all day
/Users/briancullinan/Collections/flagstaff-happy/fat-olives.json - 4 p.m.-6 p.m.
/Users/briancullinan/Collections/flagstaff-happy/fat-olives.json - 4 p.m.-6 p.m.
/Users/briancullinan/Collections/flagstaff-happy/fat-olives.json - 4 p.m.-6 p.m.
/Users/briancullinan/Collections/flagstaff-happy/fat-olives.json - 4 p.m.-6 p.m.
/Users/briancullinan/Collections/flagstaff-

TypeError: d.time.match is not a function