# Imports

In [1]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup

# Web Requests

In [2]:
def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None

    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None


def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)


def log_error(e):
    """
    It is always a good idea to log errors. 
    This function just prints them, but you can
    make it do anything.
    """
    print(e)

# Get a user's list of games

In [3]:
def getGamesPlayedForUser(userPageURL):
    html_res = BeautifulSoup(
        simple_get(userPageURL),
        'html.parser'
    )
    listOfGameIds = []
    for link in html_res.select('a'):
      if ('href' in link.attrs and '/jsp/game/game' in link['href']):
        listOfGameIds.append(link.text[1:])
    return listOfGameIds

In [4]:
listOfGameIds = getGamesPlayedForUser('http://littlegolem.net/jsp/info/player_game_list.jsp?gtid=hex&plid=6398')

In [5]:
len(listOfGameIds)

699

In [6]:
print(listOfGameIds)

['1939599', '1938419', '1938296', '1938295', '1938291', '1909790', '1891227', '1877598', '1877596', '1877594', '1877591', '1877587', '1877582', '1877576', '1872780', '1872777', '1861986', '1861985', '1861983', '1861980', '1857774', '1857587', '1855032', '1853841', '1848510', '1845725', '1845724', '1845722', '1845719', '1845715', '1845710', '1818421', '1818420', '1818417', '1818414', '1818410', '1818405', '1818399', '1818392', '1818384', '1789641', '1789639', '1789637', '1789634', '1789630', '1789625', '1789619', '1789612', '1789604', '1789595', '1779168', '1779167', '1779165', '1779162', '1779158', '1779153', '1779147', '1779140', '1771502', '1771501', '1771499', '1771496', '1771492', '1766157', '1766156', '1766153', '1766150', '1766146', '1766141', '1766135', '1766128', '1766120', '1685379', '1676448', '1653132', '1653131', '1651327', '1651326', '1651325', '1651324', '1651323', '1651322', '1651321', '1651320', '1629121', '1629120', '1629119', '1629118', '1621951', '1619428', '1612715'

# Get Information For A Single Game ID

In [28]:
def getGameInfo(gameID):
    '''
    Input (string): game id ex 2002849
    Output (list): gid,black,white,black_rating,white_rating,move_list,winner
    '''
    # List to be returned
    returnArray = []
    # Append gameID
    returnArray.append(gameID)
    # Request game html
    html_res = BeautifulSoup(
        simple_get('http://littlegolem.net/jsp/game/game.jsp?gid='+gameID),
        'html.parser'
    )
    # Dummy variables
    count = 0
    winner = ''
    moveString = ''
    # Get list of moves
    
    for gametype in html_res.select('h3'):
        a = list(gametype)[0]
        if a != '\r\n\t\t\tHex-Size 13\r\n\t\t\t:: #'+str(gameID)+'\r\n\t\t\t':
            return []
    
    for div in html_res.select('div'):
        if('portlet-body' in div['class']):
          count += 1
          if (count > 3):
            moveList = div.text.split()
    # Loop through moves making move_list and keep track of winner
    count = 0
    for move in moveList:
        count+=1
        moveText = move.split('.')[-1]
        if (moveText == "swap"):
            moveText = '*'
        if (moveText == 'resign'):
            count+=1
            break
        moveString = moveString + moveText
    # Decide which is winner based on number of moves
    winner = 'white' if (count)%2 == 0 else 'black'
    # Dummy rank because want to store both names next to each other
    pastRank = ''
    # Find name and rank info
    for div in html_res.select('div'):
        if ('col-xs-6' in div['class']):
            playerInfo = div.text.split('\n')
            playerName = playerInfo[1]
            playerInfo = div.text.split()
            playerRank = playerInfo[-1]
            returnArray.append(playerName)
            if pastRank != '':
                 returnArray.append(pastRank)
            pastRank = playerRank
    # Append all info
    returnArray.append(pastRank)
    returnArray.append(moveString)
    returnArray.append(winner)
    return returnArray

In [29]:
#output for a full completed game
getGameInfo('1939599')

['1939599',
 'fastplayer',
 'Maciej Celuch',
 '1806',
 '2688',
 'g3*e9i10f7d12b12c11d11c12f11g12e12g5j4h7c9h3f5f6h6g7g6e10b11c10b10d8d7c8f9j2e7e5b7c6d5e4k2j3k3i5l5j5k5j6l8k9l9k8k7h10h9i8j8i9h5i4g4g3f3h4d6d4j11l7a5c5a9b6m6m7e6f4j9j10a3a7i7j7',
 'white']

In [31]:
#output for a started but resigned game
getGameInfo('1994280')

['1994280',
 'gzero_bot',
 'fastplayer',
 '2067',
 '1806',
 'f3*d10c12c10b12e11d13d12c13f12e10d11f11e12f8e7d9j6i9k7j11e9e8k11g9j9j2c9d8i5k10i11i4l2i10j10j5d7j4k5l3k3k4c8g3d5f2c3d6e5d4c5c4g2e6b6e2f3f4g5h3i3h4e4b7f5f6g6f7g8h6g7f9j8h12h8i12l10g11h11g12g10f10l9j13l12k9l8k8l7k12i13j12l11l13k13g1h1',
 'black']

In [33]:
#output for a game with zero plays
getGameInfo('1877576')

['1877576', 'Marcin Pindral', 'Maciej Celuch', '1713', '2688', '', 'white']

In [35]:
#output for a game that isn't on a size 13 board
getGameInfo('1891227')

[]