# Imports

In [2]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import csv
import pandas as pd

# Web Requests

In [None]:
def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None

    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None


def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)


def log_error(e):
    """
    It is always a good idea to log errors. 
    This function just prints them, but you can
    make it do anything.
    """
    print(e)

# Get a user's list of games

In [3]:
def getGamesPlayedForUser(userPageURL):
    html_res = BeautifulSoup(
        simple_get(userPageURL),
        'html.parser'
    )
    listOfGameIds = []
    for link in html_res.select('a'):
      if ('href' in link.attrs and '/jsp/game/game' in link['href']):
        listOfGameIds.append(link.text[1:])
    return listOfGameIds

# Get Information For A Single Game ID

In [4]:
def getGameInfo(gameID):
    '''
    Input (string): game id ex 2002849
    Output (list): gid,black,white,black_rating,white_rating,move_list,winner
    '''
    # List to be returned
    returnArray = []
    # Append gameID
    returnArray.append(gameID)
    # Request game html
    html_res = BeautifulSoup(
        simple_get('http://littlegolem.net/jsp/game/game.jsp?gid='+gameID),
        'html.parser'
    )
    # Dummy variables
    count = 0
    winner = ''
    moveString = ''
    # Get list of moves
    
    for gametype in html_res.select('h3'):
        a = list(gametype)[0]
        if a != '\r\n\t\t\tHex-Size 13\r\n\t\t\t:: #'+str(gameID)+'\r\n\t\t\t':
            return []
    
    for div in html_res.select('div'):
        if('portlet-body' in div['class']):
          count += 1
          if (count > 3):
            moveList = div.text.split()
    # Loop through moves making move_list and keep track of winner
    count = 0
    for move in moveList:
        count+=1
        moveText = move.split('.')[-1]
        if (moveText == "swap"):
            moveText = '*'
        if (moveText == 'resign'):
            count+=1
            break
        moveString = moveString + moveText
    # Decide which is winner based on number of moves
    winner = 'white' if (count)%2 == 0 else 'black'
    # Dummy rank because want to store both names next to each other
    pastRank = ''
    # Find name and rank info
    for div in html_res.select('div'):
        if ('col-xs-6' in div['class']):
            playerInfo = div.text.split('\n')
            playerName = playerInfo[1]
            playerInfo = div.text.split()
            playerRank = playerInfo[-1]
            returnArray.append(playerName)
            if pastRank != '':
                 returnArray.append(pastRank)
            pastRank = playerRank
    # Append all info
    returnArray.append(pastRank)
    returnArray.append(moveString)
    returnArray.append(winner)
    return returnArray

# Go through all Hex-13 Users on Little Golem

In [None]:
finalcsvlist = []

#with all 
for i in list(range(1,24)):
    playerids = []
    listofgameplays = []
    
    #finds certain html on little golem with players listed
    currenthtml = "http://littlegolem.net/jsp/info/player_list.jsp?gtvar=hex_DEFAULT&filter=&countryid=&page="+str(i)
    html_res = BeautifulSoup(
        simple_get(currenthtml),'html.parser')
    
    tdvals = list(html_res.select('td'))
    
    #pulls all player ids
    for j in range(4, 62, 3):
        pidstart = str(tdvals[j])
        head, sep, tail = pidstart.partition('plid=')
        head, sep, tail = tail.partition('"')
        playerids.append(head)
    
    #goes through each player
    for k in playerids:
        #gets each players list of games
        print(k)
        listOfGameIds = getGamesPlayedForUser('http://littlegolem.net/jsp/info/player_game_list.jsp?gtid=hex&plid='+k)
        
        #gets each games plays in correct format
        for l in listOfGameIds:
            pinfo = getGameInfo(l)
            if len(pinfo) == 7:
                if pinfo[5] != "":
                    listofgameplays.append(pinfo)
    
    #csv name
    title = "output" + str(i) + ".csv"
    
    #writes one page worth of players game info out
    with open(title, 'w', newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["gid","black","white","black_rating","white_rating","move_list","winner"])
        writer.writerows(listofgameplays)
        finalcsvlist.append(title)

In [None]:
import pandas as pd

data = []
for csv in finalcsvlist:
    frame = pd.read_csv(csv)
    if frame.shape[1] > 7:
        frame = frame.drop(frame.columns[[0]], axis=1)
    data.append(frame)

bigframe = pd.concat(data, ignore_index=True)
bigframe = bigframe.drop_duplicates(subset='gid', keep='first', inplace=False)
bigframe.to_csv("bigboi.csv")