In [139]:
import requests
import time
import json
import pandas as pd
import qgrid
import os.path
import re
import urllib.parse
import datetime
from pytz import timezone
import pytz
from IPython.display import clear_output

MAX_PAGE_NUM = 500 # Safety measure. Won't request more than this many pages no matter the result.
BASE_URL = 'https://www.gencon.com'
BASE_PATH = 'cache/'

page_num = 1
records = []
events = pd.DataFrame()
url = BASE_URL + '/api/event_search?ag[]=eo&ag[]=tn&ag[]=ma&ag[]=tw&page=1'

def cache_file_name(url):
    return os.path.normpath(BASE_PATH + re.sub(r"[:\/\?\&\[\]]",'_', url))
    
def fetch_body(url):
    file_path = cache_file_name(url)
    if os.path.isfile(file_path):
        print('getting ' + url + ' from file')
        with open(file_path) as f:
            body = f.read()
    else:
        time.sleep(2)
        print('getting ' + url)
        res=requests.get(url)

        if res.status_code == requests.codes.ok:
            with open(file_path,'w+') as f:
                f.write(res.text)
        else:
            print('ERROR: ' + str(res.status_code) + ' from ' + url)
        body = res.text
            
    clear_output(wait=True)
    
    return body

while page_num <= MAX_PAGE_NUM:
    body = fetch_body(url)
    events_json = json.loads(body)

    for record in events_json['records']:
        events = events.append(pd.DataFrame.from_dict([record['_source']]), ignore_index=True)

    if events_json['has_more']:
        page_num += 1
        url = BASE_URL + events_json['next_path']
    else:
        break

# make titles link to gencon page. insert sort_key attribute to preserve ability to alpha sort.
events['title'] = events.apply(lambda row: '<a sort_key=\"' + row['title'].lstrip() + '\" href=\"https://www.gencon.com/events/' + str(row['id'])  + '\" target=\"_blank\">' + row['title'] + '</a>', axis=1)

# Make game system names link to google search
events['game_system'] = events['game_system'].apply(lambda x: '<a href=\"https://www.google.com/search?q='+ urllib.parse.quote_plus(x.lstrip()) + '+site%3Aboardgamegeek.com" target=\"_blank\">' + x.lstrip() + '</a>')

# Make more readable start time column
eastern = timezone('US/Eastern')
events['event_start'] = events['date_with_time'].apply(lambda x: datetime.datetime.fromtimestamp(x).astimezone(eastern).strftime('%m-%d %I:%M %p'))

getting https://www.gencon.com/api/event_search?ag[]=eo&ag[]=tn&ag[]=ma&ag[]=tw&page=401


In [138]:
widget = qgrid.show_grid(events[['title', 'short_description', 'event_start', 'event_duration', 'game_system', 'rules_edition', 'tickets_available', 'materials_provided', 'printable_event_type']], grid_options={'forceFitColumns': False,'fullWidthRows': True}, show_toolbar=True)
widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…