In [1]:
# default_exp core.scraping.linescore

# Linescore

> Scrapes CZ linescore information.

In [2]:
#hide
from nbdev.showdoc import *

In [3]:
#export

from czapi.core.scraping.base import make_soup
from czapi.core.scraping.constants import BOXSCORE_KWARGS, LINESCORE_SOUP_TYPE, BOXSCORE_SOUP_TYPE
from czapi.core.scraping.event import get_event_name,get_event_date, get_url, _get_event_name, _get_event_date
from bs4 import BeautifulSoup, Tag
from collections import defaultdict
from typing import List, Union, Optional
from hashlib import sha256
import re

In [4]:
# exporti
def generate_dict_from_table(

    table : Tag

)->Union[dict,defaultdict]:
    """Helper function for returning the curling boxscore from a bs4 Tag object."""
    d = defaultdict(list)
    team = None
    
    # TODO : add error handling for when no table is passed / None
    
    if table is None:
        raise ValueError('Table tag is NoneType.')
    
    # loop through tags in table
    for tag in table.find_all('td'):
        if tag.attrs.get('class') == ['linescoreteam']:
            team = tag.a.string
            d[team] = defaultdict(list)
            d[team]['href'] = tag.a['href']
        elif tag.attrs.get('class') == ['linescorehammer']:
            d[team]['hammer'] = not bool(tag.string) # opposite for some reason
        elif tag.attrs.get('class') == ['linescoreend']:
            d[team]['score'].append(tag.string.strip())
        elif tag.attrs.get('class') == ['linescorefinal']:
            d[team]['finalscore'] = tag.b.string.strip()
        
    return d

In [5]:
#exporti
def _get_draw_from_boxscore_page(
     soup : BeautifulSoup
)->str:
    return 'Not supported for boxscore page.'
    
def _get_draw_from_linesore_page(
    soup : BeautifulSoup

)->str:
    
    return soup.find(name='option',attrs={'selected':'selected'}).string

def _get_draw(

     soup : BeautifulSoup
    ,soup_type : str
    ,**kwargs

)->str:
    soup_type = soup_type.lower()
    
    if soup_type == LINESCORE_SOUP_TYPE:
        return _get_draw_from_linesore_page(soup=soup,**kwargs)
    elif soup_type == BOXSCORE_SOUP_TYPE:
        return _get_draw_from_boxscore_page(soup=soup,**kwargs)
    else:
        raise NotImplementedError("%s soup type is not implemented."%soup_type)
    
    

In [6]:
# exporti

def get_boxscore_from_table(

    table : Tag

)->Union[dict,defaultdict]:
    """Wraps generate_dict_from_table for clarity / error handling."""
    try:
        return generate_dict_from_table(table = table)
    
    except ValueError as e:
        # TODO : change return value based on what makes sense for the API
        return {}

def get_boxscore_from_game_id(

     cz_game_id : str
    ,**request_kwargs
)->Union[dict,defaultdict]:
    """Returns a curling boxscore (dict) based on the cz_game_id."""
    
    url = 'https://www.curlingzone.com/game.php?1=1&showgameid=%s#1'%cz_game_id
    soup = make_soup(url=url,**request_kwargs)
    return _get_boxscore_from_game_id(soup=soup)
    
def _get_boxscore_from_game_id(

    soup : BeautifulSoup

)->Union[dict,defaultdict]:
    
    table = soup.find(**BOXSCORE_KWARGS)
    
    try:
        return get_boxscore_from_table(table=table)
    
    except ValueError as e:
        return {}


In [7]:
# hide
expected_dict = {
    
    'Wayne Tuck Jr.' : {
        
         'href' : 'event.php?view=Team&eventid=6400&teamid=144353&profileid=12486#1'
        ,'hammer' : True
        ,'score' : ['0','2','0','0','0','0','1','1','1','0']
        ,'finalscore' : '5'
        
    }
    ,'Matthew Hall' : {
        
         'href' : 'event.php?view=Team&eventid=6400&teamid=144347&profileid=12435#1'
        ,'hammer' : False
        ,'score' : ['0','0','4','0','0','1','0','0','0','2']
        ,'finalscore' : '7'
    }
    
}

actual_dict = get_boxscore_from_game_id(cz_game_id = 271145)

assert actual_dict == expected_dict

In [8]:
# exporti

def get_table_from_index(

     tables : List[Tag]
    ,game_number : int

)->Tag:
    """Returns a 'table' Tag object from a list of 'table' Tag objects. This helper function allows for 1 indexing instead of 0."""
    # TODO confirm this is the kind of error handling we want
    if game_number < 1 :
        raise ValueError('Table number must be greater than 0.')
        
    game_idx = game_number -1 
    
    try:
        return tables[game_idx]
    except IndexError as e:
        raise IndexError(". ".join([str(e),"Are you sure that game number is valid?"]))
    
def get_boxscore_from_event_draw_game_number(

     cz_event_id : Union[str,int]
    ,cz_draw_id : int
    ,game_number : int 
    ,**request_kwargs
)->Union[dict,defaultdict]:
    """Returns a curling boxscore (dict) based on the cz_event_id, cz_draw_id and game_number."""
    url = 'https://curlingzone.com/event.php?eventid=%s&view=Scores&showdrawid=%s#1'%(cz_event_id,cz_draw_id)
    soup = make_soup(url=url,**request_kwargs)
    
    return _get_boxscore_from_event_draw_game_number(soup=soup,game_number = game_number)
    
def get_boxscore_tables_from_event_draw_game_number(

     soup : BeautifulSoup

)->List[Tag]:
    
    return soup.find_all(**BOXSCORE_KWARGS)
    
def get_linescore_page_game_num(

    soup : BeautifulSoup

)->int:
    return len(get_boxscore_tables_from_event_draw_game_number(soup=soup))

def _get_boxscore_from_tables(

     tables : List[Tag]
    ,game_number : int

)->Union[dict,defaultdict]:
    try:
        table = get_table_from_index(tables = tables, game_number = game_number)
        return get_boxscore_from_table(table = table)
    
    except IndexError as e:
        return {}
    
    except ValueError as e:
        return {}  
    
    except TypeError as e:
        return {}
    
def _get_boxscore_from_event_draw_game_number(

     soup : BeautifulSoup
    ,game_number : int
)->Union[dict,defaultdict]:
    tables = get_boxscore_tables_from_event_draw_game_number(soup=soup)
    return _get_boxscore_from_tables(tables=tables,game_number = game_number)
    

    

In [9]:
# hide
actual_dict =  get_boxscore_from_event_draw_game_number(

     cz_event_id = 6400
    ,cz_draw_id = 2
    ,game_number = 1

) 

assert actual_dict == expected_dict

In [10]:
# exporti
def _get_boxscore(

     soup : BeautifulSoup
    ,soup_type : str
    ,**game_kwargs

)->Union[dict,defaultdict]:
    
    soup_type = soup_type.lower()
    
    if soup_type == LINESCORE_SOUP_TYPE:
        return _get_boxscore_from_event_draw_game_number(soup=soup,**game_kwargs)
    elif soup_type == BOXSCORE_SOUP_TYPE:
        return _get_boxscore_from_game_id(soup=soup,**game_kwargs)
    else:
        raise NotImplementedError("%s soup type is not implemented."%soup_type)

In [11]:
#exporti
def hash_obj(

     obj
    ,hash_type = 'sha256'
    ,encoding='utf-8'
):
    hash_type = hash_type.lower()
    encoding = encoding.lower()
    
    if hash_type == 'sha256':
        hash_func = sha256
    else:
        raise NotImplementedError("Hash function %s not supported."%hash_type)
        
    return hash_func(str(obj).encode(encoding)).hexdigest().lower()
        

def get_page_soup(

     soup_type
    ,url_kwargs 
    ,**request_kwargs 

):
        
    url = get_url(soup_type = soup_type, **url_kwargs)
    return make_soup(url=url,**request_kwargs)
    


def request_type(

     cz_event_id : Optional[Union[str,int]] = None
    ,cz_draw_id : Optional[int] = None
    ,game_number : Optional[int] = None
    ,cz_game_id : Optional[Union[str,int]] = None

):
    option_1 = [cz_event_id, cz_draw_id]
    option_2 = cz_game_id
    
    if all([all(option_1), option_2]) or not any([all(option_1),option_2]):
        raise ValueError("One combination of cz_event_id, cz_draw_id and game_number or cz_game_id must be non NoneType.")
        
    if all(option_1):
        soup_type = LINESCORE_SOUP_TYPE
        url_kwargs = {
             'cz_event_id' : cz_event_id
            ,'cz_draw_id' : cz_draw_id
        }
        game_kwargs = {
            'game_number' : game_number
        }
        
        draw_kwargs = {}
        
        
    else:
        soup_type = BOXSCORE_SOUP_TYPE
        url_kwargs = {
            'cz_game_id' : cz_game_id
        }
        game_kwargs = {}
        draw_kwargs = {}
        
        
    return soup_type,url_kwargs,game_kwargs,draw_kwargs

In [12]:
#hide
assert sha256(str(1).encode('utf-8')).hexdigest().lower() == hash_obj(1)
assert sha256(str('12321312512312312321').encode('utf-8')).hexdigest().lower() == hash_obj(12321312512312312321)


assert request_type(cz_event_id=6900,cz_draw_id=1) == (LINESCORE_SOUP_TYPE,{'cz_event_id':6900,'cz_draw_id':1},{'game_number':None},{})
assert request_type(cz_event_id=6900,cz_draw_id=1,game_number=1) == (LINESCORE_SOUP_TYPE,{'cz_event_id':6900,'cz_draw_id':1},{'game_number':1},{})

assert request_type(cz_game_id=27834787) == (BOXSCORE_SOUP_TYPE,{'cz_game_id':27834787},{},{})

In [13]:
#export

def get_boxscore_without_hash(

     cz_event_id : Optional[Union[str,int]] = None
    ,cz_draw_id : Optional[int] = None
    ,game_number : Optional[int] = None
    ,cz_game_id : Optional[Union[str,int]] = None
    ,**request_kwargs

):
    soup_type,url_kwargs,game_kwargs,draw_kwargs = request_type(cz_event_id = cz_event_id,cz_draw_id=cz_draw_id,game_number=game_number,cz_game_id=cz_game_id)
    soup = get_page_soup(soup_type=soup_type,url_kwargs = url_kwargs,**request_kwargs)
    
    return _get_boxscore_without_hash(soup=soup,soup_type=soup_type,game_kwargs=game_kwargs,draw_kwargs=draw_kwargs)


def _get_boxscore_without_hash(

     soup
    ,soup_type
    ,game_kwargs
    ,draw_kwargs


):
    event,date,boxscore,draw = get_game_info(soup=soup,soup_type=soup_type,game_kwargs=game_kwargs,draw_kwargs=draw_kwargs)
    return {d[0]:{**d[-1],'date':date,'event':event,'draw':draw} for d in boxscore.items()}

In [14]:
#export
def get_full_boxscore(

     cz_event_id : Optional[Union[str,int]] = None
    ,cz_draw_id : Optional[int] = None
    ,game_number : Optional[int] = None
    ,cz_game_id : Optional[Union[str,int]] = None
    ,**request_kwargs


)->dict:
    """
        Returns a curling boxscore (dict) with data hash based on the cz_event_id, cz_draw_id and game_number or the cz_game_id.
        get_full_boxscore limits the number of get_requests that are made to the CurlingZone site. 
    """

    soup_type,url_kwargs,game_kwargs,draw_kwargs = request_type(cz_event_id = cz_event_id,cz_draw_id=cz_draw_id,game_number=game_number,cz_game_id=cz_game_id)
    soup = get_page_soup(soup_type=soup_type,url_kwargs = url_kwargs,**request_kwargs)
    
    return get_boxscore_with_all_info(
    
         soup = soup
        ,soup_type = soup_type
        ,game_kwargs = game_kwargs
        ,draw_kwargs = draw_kwargs
    
    )

def get_boxscore_with_all_info(

     soup
    ,soup_type
    ,game_kwargs
    ,draw_kwargs


):
    boxscore_without_hash = _get_boxscore_without_hash(soup=soup,soup_type=soup_type,game_kwargs=game_kwargs,draw_kwargs=draw_kwargs)
    
    # will utf-8 always work?
    _hash = hash_obj(obj = boxscore_without_hash)
    
    return {d[0]:{**d[-1],'hash':_hash} for d in boxscore_without_hash.items()}


def get_game_info(

     soup
    ,soup_type
    ,game_kwargs 
    ,draw_kwargs

):
    event = _get_event_name(soup=soup,soup_type = soup_type)
    date = _get_event_date(soup=soup,soup_type = soup_type)
    boxscore = _get_boxscore(soup=soup,soup_type=soup_type,**game_kwargs)
    draw = _get_draw(soup=soup,soup_type = soup_type,**draw_kwargs)
    
    return event,date,boxscore,draw

In [15]:
#export
    
def get_all_boxscores_from_linescore_page(

     cz_event_id : Optional[Union[str,int]] = None
    ,cz_draw_id : Optional[int] = None
    ,**request_kwargs

)->List[Union[dict,defaultdict]]:
    
    soup_type,url_kwargs,game_kwargs,draw_kwargs = request_type(cz_event_id = cz_event_id,cz_draw_id=cz_draw_id)
    soup = get_page_soup(soup_type=soup_type,url_kwargs = url_kwargs,**request_kwargs)
    game_numbers = get_linescore_page_game_num(soup)
    
    return [get_boxscore_with_all_info(soup=soup,soup_type=soup_type,draw_kwargs=draw_kwargs,game_kwargs = {**game_kwargs,'game_number':i+1}) for i in range(game_numbers)]
    

    

In [16]:
get_all_boxscores_from_linescore_page(cz_event_id = 6900
    ,cz_draw_id = 2)

[{'ON (Duncan)': {'href': 'event.php?view=Team&eventid=6900&teamid=158679&profileid=29756#1',
   'hammer': True,
   'score': ['2', '0', '0', '0', '2', '0', '1', '0', 'X', ''],
   'finalscore': '5',
   'date': 'Jan 28 - Feb 6, 2022',
   'event': 'Scotties Tournament of Hearts',
   'draw': 'Draw: 2',
   'hash': 'b5315ef80e3cb99d36b31ae745170421f16867da1e701322da5fa34ebbdf81f7'},
  'CA (Einarson)': {'href': 'event.php?view=Team&eventid=6900&teamid=158669&profileid=28375#1',
   'hammer': False,
   'score': ['0', '1', '1', '3', '0', '4', '0', '3', 'X', ''],
   'finalscore': '12',
   'date': 'Jan 28 - Feb 6, 2022',
   'event': 'Scotties Tournament of Hearts',
   'draw': 'Draw: 2',
   'hash': 'b5315ef80e3cb99d36b31ae745170421f16867da1e701322da5fa34ebbdf81f7'}},
 {'NS (Black)': {'href': 'event.php?view=Team&eventid=6900&teamid=158677&profileid=29750#1',
   'hammer': True,
   'score': ['0', '1', '0', '2', '0', '2', '1', '0', '1', 'X'],
   'finalscore': '7',
   'date': 'Jan 28 - Feb 6, 2022',
  

In [17]:
get_full_boxscore(cz_game_id = 271145)

{'Wayne Tuck Jr.': {'href': 'event.php?view=Team&eventid=6400&teamid=144353&profileid=12486#1',
  'hammer': True,
  'score': ['0', '2', '0', '0', '0', '0', '1', '1', '1', '0'],
  'finalscore': '5',
  'date': 'Jan 17 - 19, 2020',
  'event': 'Ontario Tankard - Open Qualifier',
  'draw': 'Not supported for boxscore page.',
  'hash': '944b4b512a0b43e5d8dc569a6483863108d16dcc659eec5d73c58a252eed5507'},
 'Matthew Hall': {'href': 'event.php?view=Team&eventid=6400&teamid=144347&profileid=12435#1',
  'hammer': False,
  'score': ['0', '0', '4', '0', '0', '1', '0', '0', '0', '2'],
  'finalscore': '7',
  'date': 'Jan 17 - 19, 2020',
  'event': 'Ontario Tankard - Open Qualifier',
  'draw': 'Not supported for boxscore page.',
  'hash': '944b4b512a0b43e5d8dc569a6483863108d16dcc659eec5d73c58a252eed5507'}}

In [18]:
get_full_boxscore(cz_event_id = 6900,cz_draw_id = 1, game_number = 1)

{'NU (MacPhail)': {'href': 'event.php?view=Team&eventid=6900&teamid=158678&profileid=30419#1',
  'hammer': False,
  'score': ['0', '0', '0', '0', '0', '2', '0', '0', 'X', ''],
  'finalscore': '2',
  'date': 'Jan 28 - Feb 6, 2022',
  'event': 'Scotties Tournament of Hearts',
  'draw': 'Draw: 1',
  'hash': 'bbc05838e014c15d853013b524994252dfb6f46dee034cfa5a830eb8428e92ff'},
 'PEI (Birt)': {'href': 'event.php?view=Team&eventid=6900&teamid=158680&profileid=29919#1',
  'hammer': True,
  'score': ['1', '2', '1', '0', '2', '0', '3', '3', 'X', ''],
  'finalscore': '12',
  'date': 'Jan 28 - Feb 6, 2022',
  'event': 'Scotties Tournament of Hearts',
  'draw': 'Draw: 1',
  'hash': 'bbc05838e014c15d853013b524994252dfb6f46dee034cfa5a830eb8428e92ff'}}

In [19]:
#hide
# TODO : tests

> The picture below highlights where the cz_game_id is found on the CurlingZone [page](https://curlingzone.com/game.php?1=1&showgameid=271145#1). 

![game by gameid](./imgs/game_by_game_id.png)

> The picture below highlights where the cz_event_id and cz_draw_id are found and how the games are numbered (game_number) on the CurlingZone [page](https://curlingzone.com/event.php?eventid=6400&view=Scores&showdrawid=2#1). 

![game by event id, draw and game number](./imgs/game_by_event_draw_game_number.png)