In [1]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s <%(name)s:%(module)s[%(lineno)d]> %(levelname)s: %(message)s',
                              '%a %b %d %H:%M:%S')
handler.setFormatter(formatter)
logger.addHandler(handler)

In [2]:
from selenium import webdriver
from bs4 import BeautifulSoup
import os
from selenium.webdriver.firefox.options import Options as FirefoxOptions
import itertools
import time
import re
import json
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

In [3]:
from golfgenius.parser import GGParser

In [4]:
parser = GGParser(headless=False)

In [5]:
import re
logger.info("Loading landing page {}".format(parser.landing_page))
parser.driver.get(parser.landing_page)
logger.debug("Locating rounds...")
rounds = {}
links = {}
for o in parser.soup.find(id='round').find_all('option'):
    round_name = o.text.strip()
    if isinstance(filter, re.Pattern):
        if filter.match(round_name) is None:
            logger.info("Skipping round %s, does not match pattern %s" % (round_name, filter.pattern))
            continue
    if round_name in parser._captured_rounds:
        logger.info("Skipping round %s, already captured" % round_name)
        continue
    rounds[round_name] = {
        "name": round_name,
        "results": {
            "teams": [],
            "scores": {}
        }
    }
    logger.info("Locating round %s.." % round_name)
    links[round_name] = {}
    parser._get_element(o).click()
    WebDriverWait(parser.driver, 15).until(
        EC.visibility_of_element_located(
            (By.XPATH, "//a[@class='expand-tournament']")))

    for anchor in parser.soup.find_all('a', {"class": "expand-tournament", "data-tournament-spec-id": True,
                                           "data-tournament-event-id": True}):
        eid = anchor.attrs["data-tournament-event-id"]
        sid = anchor.attrs["data-tournament-spec-id"]
        # Using href from option will not work
        href = "https://www.golfgenius.com/tournaments2/details?adjusting=false&event_id=%s" % eid
        text = anchor.text.strip()
        links[round_name][text] = {
            "event_id": eid, "spec_id": sid, "href": href, "text": text
        }

Thu Mar 24 12:52:15 <root:<ipython-input-5-1fd0650446e5>[2]> INFO: Loading landing page https://www.golfgenius.com/leagues/8102529389645833196/widgets/tournament_results
Thu Mar 24 12:52:18 <root:<ipython-input-5-1fd0650446e5>[23]> INFO: Locating round Round 17 (Fri, March 18)..
Thu Mar 24 12:52:19 <root:<ipython-input-5-1fd0650446e5>[23]> INFO: Locating round March 11 (Fri, March 11)..
Thu Mar 24 12:52:21 <root:<ipython-input-5-1fd0650446e5>[23]> INFO: Locating round Round 15 (Fri, March  4)..
Thu Mar 24 12:52:22 <root:<ipython-input-5-1fd0650446e5>[23]> INFO: Locating round Round 14 (Wed, March  2)..
Thu Mar 24 12:52:23 <root:<ipython-input-5-1fd0650446e5>[23]> INFO: Locating round Round 13 (Fri, February 25)..
Thu Mar 24 12:52:23 <root:<ipython-input-5-1fd0650446e5>[23]> INFO: Locating round Round 12 (Wed, February 23)..
Thu Mar 24 12:52:24 <root:<ipython-input-5-1fd0650446e5>[23]> INFO: Locating round Round 10 (Thu, February 17)..
Thu Mar 24 12:52:24 <root:<ipython-input-5-1fd06504

In [10]:
list(rounds.items())[0]

('Round 17 (Fri, March 18)',
 {'name': 'Round 17 (Fri, March 18)', 'results': {'teams': [], 'scores': {}}})

In [11]:
list(links.items())[0]

('Round 17 (Fri, March 18)',
 {'2BB FRONT': {'event_id': '8308573300959781500',
   'spec_id': '8308573195196211714',
   'href': 'https://www.golfgenius.com/tournaments2/details?adjusting=false&event_id=8308573300959781500',
   'text': '2BB FRONT'},
  '2BB BACK': {'event_id': '8308573293712024187',
   'spec_id': '8308573202645295619',
   'href': 'https://www.golfgenius.com/tournaments2/details?adjusting=false&event_id=8308573293712024187',
   'text': '2BB BACK'},
  '2BB OVERALL': {'event_id': '8308573307771331197',
   'spec_id': '8308573211000349188',
   'href': 'https://www.golfgenius.com/tournaments2/details?adjusting=false&event_id=8308573307771331197',
   'text': '2BB OVERALL'},
  'SKINS': {'event_id': '8308573316562592382',
   'spec_id': '8308573219959382533',
   'href': 'https://www.golfgenius.com/tournaments2/details?adjusting=false&event_id=8308573316562592382',
   'text': 'SKINS'}})

In [15]:
parser.driver.get('https://www.golfgenius.com/tournaments2/details?adjusting=false&event_id=8308573307771331197')
WebDriverWait(parser.driver, 15).until(
                        EC.visibility_of_element_located(
                            (By.XPATH, "//table[@class='scorecard']")))

<selenium.webdriver.firefox.webelement.FirefoxWebElement (session="58a1bbc6-5cd6-b14d-9950-a6fb0d27b22e", element="94c5c3e9-1bd4-e644-99bd-2d391a3730a6")>

In [18]:
table = parser.soup.find('table', {"class": "scorecard"})
teams = [[x.strip() for x in tr.attrs["data-aggregate-name"].split("+")] for tr in table.find_all(
                        "tr", {"class": "aggregate_score", "data-aggregate-name": True})]

In [23]:
table

<table class="scorecard" name="Regular">
<tbody><tr class="header_row" style="">
<th colspan="100">
Strokes
</th>
</tr>
<tr class="header_row tee_header_row" style="">
<td style=""></td>
<td class=""> 1</td>
<td class=""> 2</td>
<td class=""> 3</td>
<td class=""> 4</td>
<td class=""> 5</td>
<td class=""> 6</td>
<td class=""> 7</td>
<td class=""> 8</td>
<td class=""> 9</td>
<td style="">Out</td>
<td class="" style="text-align: center;">
10
</td>
<td class="" style="text-align: center;">
11
</td>
<td class="" style="text-align: center;">
12
</td>
<td class="" style="text-align: center;">
13
</td>
<td class="" style="text-align: center;">
14
</td>
<td class="" style="text-align: center;">
15
</td>
<td class="" style="text-align: center;">
16
</td>
<td class="" style="text-align: center;">
17
</td>
<td class="" style="text-align: center;">
18
</td>
<td style="">In</td>
<td class="" style="">
Total
</td>
</tr>
<tr class="net-line" data-net-name="Akins, Kyle">
<td class="name left_aligned">


In [29]:
score_types = {
    "double_circle": "eagle",
    "simple_circle": "birdie",
    None: "par",
    "simple_square": "plus1",
    "double_square": "plus2"
}
for player_row in [tr for tr in table.find_all('tr', {"class": "net-line"}) if tr.attrs.get("data-net-name") is not None]:
    player_name = player_row.attrs["data-net-name"].strip()
    for score in player_row.find_all('td', {'class': 'score'}):
        print("=========")
        print("score:", score)
        print("=========")
        hole, value_int, score_type = None, None, None
        hole_list = [a for a in score.attrs["class"] if a.startswith('hole')]
        if len(hole_list) == 1:
            hole = hole_list[0].replace("hole", "")
        print("=========")
        print("hole:", hole)
        print("=========")
        value = score.find('div', {"class": "single-score"}).text.strip()
        if value.isdigit():
            value_int = int(value)
        print("=========")
        print("value_int:", value_int)
        print("=========")
        type_list = [a for a in score.attrs["class"] if a in score_types]
        print("=========")
        print("type_list:", type_list)
        print("=========")
        if len(type_list) != 1:
            gg_type = None
        else:
            gg_type = type_list[0]
        score_type = score_types[gg_type]
        print("=========")
        print("score_type:", score_type)
        print("=========")
        if hole is not None and value_int is not None and score_type is not None:
            print("=========")
            print("value_int, score_type:", value_int, score_type)
            print("=========")

score: <td class="hole1 score simple_circle" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
4
</span>
</div>
</td>
hole: 1
value_int: 4
type_list: ['simple_circle']
score_type: birdie
value_int, score_type: 4 birdie
score: <td class="hole2 score simple_circle" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
3
</span>
</div>
</td>
hole: 2
value_int: 3
type_list: ['simple_circle']
score_type: birdie
value_int, score_type: 3 birdie
score: <td class="hole3 score simple_circle" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
2
</span>
</div>
</td>
hole: 3
value_int: 2
type_list: ['simple_circle']
score_type: birdie
value_int, score_type: 2 birdie
score: <td class="hole4 score" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
4
</span>
</div>
</td>
hole: 4
value_int: 4
type_list: []
score_type: par


value_int, score_type: 5 plus1
score: <td class="hole3 score simple_square" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
4
</span>
</div>
</td>
hole: 3
value_int: 4
type_list: ['simple_square']
score_type: plus1
value_int, score_type: 4 plus1
score: <td class="hole4 score" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
4
</span>
</div>
</td>
hole: 4
value_int: 4
type_list: []
score_type: par
value_int, score_type: 4 par
score: <td class="hole5 score" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
5
</span>
</div>
</td>
hole: 5
value_int: 5
type_list: []
score_type: par
value_int, score_type: 5 par
score: <td class="double_square hole6 score" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
6
</span>
</div>
</td>
hole: 6
value_int: 6
type_list: ['double_square']
score_type: plus2
value_int,

score_type: plus1
value_int, score_type: 4 plus1
score: <td class="hole9 score simple_circle" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
3
</span>
</div>
</td>
hole: 9
value_int: 3
type_list: ['simple_circle']
score_type: birdie
value_int, score_type: 3 birdie
score: <td class="hole10 score simple_square" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
6
</span>
</div>
</td>
hole: 10
value_int: 6
type_list: ['simple_square']
score_type: plus1
value_int, score_type: 6 plus1
score: <td class="hole11 score" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
3
</span>
</div>
</td>
hole: 11
value_int: 3
type_list: []
score_type: par
value_int, score_type: 3 par
score: <td class="hole12 score" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
4
</span>
</div>
</td>
hole: 12
value_int: 4
type_list: [

score_type: plus1
value_int, score_type: 6 plus1
score: <td class="hole15 score" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
3
</span>
</div>
</td>
hole: 15
value_int: 3
type_list: []
score_type: par
value_int, score_type: 3 par
score: <td class="hole16 score simple_square" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
5
</span>
</div>
</td>
hole: 16
value_int: 5
type_list: ['simple_square']
score_type: plus1
value_int, score_type: 5 plus1
score: <td class="hole17 score simple_circle" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
3
</span>
</div>
</td>
hole: 17
value_int: 3
type_list: ['simple_circle']
score_type: birdie
value_int, score_type: 3 birdie
score: <td class="hole18 score simple_square" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
5
</span>
</div>
</td>
hole: 18
value_int

value_int: 4
type_list: []
score_type: par
value_int, score_type: 4 par
score: <td class="hole5 score" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
5
</span>
</div>
</td>
hole: 5
value_int: 5
type_list: []
score_type: par
value_int, score_type: 5 par
score: <td class="hole6 score simple_square" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
5
</span>
</div>
</td>
hole: 6
value_int: 5
type_list: ['simple_square']
score_type: plus1
value_int, score_type: 5 plus1
score: <td class="hole7 score" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
4
</span>
</div>
</td>
hole: 7
value_int: 4
type_list: []
score_type: par
value_int, score_type: 4 par
score: <td class="hole8 score" style="">
<div class="handicap-dots">
</div>
<div class="single-score">
<span class="score_box">
3
</span>
</div>
</td>
hole: 8
value_int: 3
type_list: []
score_type: par


In [4]:
#data = list(parser.iter_rounds('kdjfek'))
#data

Thu Mar 24 12:44:08 <golfgenius.parser:parser[199]> INFO: Loading landing page https://www.golfgenius.com/leagues/8102529389645833196/widgets/tournament_results
Thu Mar 24 12:44:12 <golfgenius.parser:parser[220]> INFO: Locating round Round 17 (Fri, March 18)..
Thu Mar 24 12:44:13 <golfgenius.parser:parser[220]> INFO: Locating round March 11 (Fri, March 11)..
Thu Mar 24 12:44:14 <golfgenius.parser:parser[220]> INFO: Locating round Round 15 (Fri, March  4)..
Thu Mar 24 12:44:15 <golfgenius.parser:parser[220]> INFO: Locating round Round 14 (Wed, March  2)..
Thu Mar 24 12:44:16 <golfgenius.parser:parser[220]> INFO: Locating round Round 13 (Fri, February 25)..
Thu Mar 24 12:44:16 <golfgenius.parser:parser[220]> INFO: Locating round Round 12 (Wed, February 23)..
Thu Mar 24 12:44:17 <golfgenius.parser:parser[220]> INFO: Locating round Round 10 (Thu, February 17)..
Thu Mar 24 12:44:18 <golfgenius.parser:parser[220]> INFO: Locating round Round 9 (Wed, February 16)..
Thu Mar 24 12:44:19 <golfgen

In [6]:
parser.close()