In [1]:
import requests
import pandas as pd
import numpy as np
import json
from bs4 import BeautifulSoup

In [2]:
# format for player page
#   http://www.koldb.com/player.php?name=Phillammon

# standard 2019 leaderboard
#   http://www.koldb.com/searchresults.php?type=hardcore&path=standard&sortby=dt&timetype=ralph&evdir=after&event=custom&s1month=1&s1day=1&s1year=2019&unique=on&lim=500

# exploath 2019 leaderboard
#   NaN

In [61]:
# sample data
sampleData = requests.get('http://www.koldb.com/player.php?name=Lyft').text

In [35]:
# In this cell I'll track our inclusions/exclusions.

# Included run types; currently just HC / Bad Moon
allTypes = ['Hardcore', 'Bad Moon']

# Excluded paths; these will be scrubbed from final totals
exclPath = ['Community Service', 'Teetotaler', 'Boozetafarian', 'No path']

In [88]:
def playerLookup(pName = 'Lyft', metricOutput=True, pData = sampleData):
    ''' Function to look up data for a specific player and output it. '''
    
    # Until I actually have it working as I want it, use sampleData to reduce KOLDB 
    pData = requests.get('http://www.koldb.com/player.php?name={}'.format(pName)).text
    
    pSoup  = BeautifulSoup(pData, "html.parser")
    pTable = pSoup.find_all('table')[0]

    pDict = {'Normal':{},'Hardcore':{},'Bad Moon':{}, 'Casual':{}}
    ns11count = 0

    # Iterate through player's summary table
    for row in pTable.find_all("tr")[1:]: 
        cells = row.find_all("td")

        # Split out runtype/path via image alt text
        runType = cells[0].find('img', alt=True)['alt']
        
        # Add a bad moon indicator to the class, for later aggregation
        if runType == 'Bad Moon':
            runPath = 'BM-'+cells[1].find('img', alt=True)['alt']
        else:
            runPath = cells[1].find('img', alt=True)['alt']
            
        bestRun = cells[2].find_all('a', text=True)[0].text
        
        try:
            runRank = cells[2].find_all('a', text=True)[1].text
        except:
            # error catching for exploathing
            runRank = '#1000 of 1000'

        if bestRun == '# of ':
            # error catching for exploathing again
            bestRun = '100 days, 10000 turns'

        days  = int(bestRun.split(' day')[0])
        turns = int(bestRun.split(' day')[1].replace(' turns','').replace('s, ','').replace(', ',''))

        percentileRank = 1.0 - float(runRank.split(' of ')[0].replace('#',''))/float(runRank.split(' of ')[1])

        pDict[runType][runPath] = {'days':days,'tc':turns,'summ':'{}-{}'.format(days,turns),'rankText':runRank,'pctRank':percentileRank}
    
    # All-run summary table!
    for runT in allTypes:
        if pDict[runT] != {}:
            try: 
                pSummary = pSummary.append(pd.DataFrame(pDict[runT]).T.loc[:,['summ','days','tc','pctRank','rankText']]).sort_values('pctRank',ascending=False)
            except:
                pSummary = pd.DataFrame(pDict[runT]).T.loc[:,['summ','days','tc','pctRank','rankText']].sort_values('pctRank',ascending=False)
    
    # Exclude the excluded paths!
    pSummary = pSummary.loc[~pSummary.index.isin(exclPath),:]
    
    # Now that you have the summary table finished, iterate through full player history
    pTable = pSoup.find_all('table')[-1]
    pHistory = {}
    
    for row in pTable.find_all("tr")[1:]: 
        cells = row.find_all("td")
        
        runNum   = cells[0].find('a')['name']
        
        try: 
            runClass = cells[1].find('img',alt=True)['alt']
            runDate  = cells[3].find('a').text
            runDays  = int(cells[4].text)
            runTurns = int(cells[5].text)
            runType  = cells[6].find('img', alt=True)['alt']
            runPath  = cells[7].find('img', alt=True)['alt']
        except:
            runClass = 'NaN'
            runDate  = cells[3].text
            runDays  = 100
            runTurns = 10000
            runType  = 'Casual'
            runPath  = 'Dropped'
        
        try: 
            runFam   = cells[9].find('img', alt=True)['alt']
            runFamPct= cells[9].text
        except:
            runFam   = 'NaN'
            runFamPct= 'NaN'
        
        # Store the # of NS11 ascensions this list is missing
        if pHistory == {}: ns11count = int(runNum) - 1
        
        pHistory[runNum] = {
            'class':runClass,
            'date':runDate,
            'days':runDays,
            'turns':runTurns,
            'type':runType,
            'path':runPath,
            'fam':runFam,
            'fam%':runFamPct.strip()
        }
    
    # store the player's history DF
    pHistoryDF = pd.DataFrame(pHistory).T.loc[:,['date','type','path','class','days','turns','fam','fam%']]
    
    if metricOutput == True:
        # METRIC #1 -- Best Percentile Standing (HC Run)
        bRun = pSummary.iloc[0,:]
        m1 = '{}; {} {}/{}'.format(round(bRun.pctRank,3)*100,bRun.name,bRun.days,bRun.tc)
        
        # METRIC #2 -- Avg of 3 best percentiles
        m2 = round(np.mean(list(pSummary.pctRank[0:3])),3)
        
        # METRIC #3 -- Avg of 6 best percentiles
        m3 = round(np.mean(list(pSummary.pctRank[0:6])),3)
        
        # METRIC #4 -- Total # of non-excluded HC runs
        exclHist = pHistoryDF.loc[~pHistoryDF['path'].isin(exclPath),:]
        exclHist = exclHist.loc[exclHist['type'].isin(allTypes),:]
        
        if ns11count > 0:
            m4 = '{} ({})'.format(len(exclHist.index),ns11count)
        else:
            m4 = '{}'.format(len(exclHist.index))
            
        # METRIC #5 -- Predicted Karma
        vCounts = pHistoryDF['type'].value_counts()
        
        try:     countHC = vCounts['Hardcore']
        except:  countHC = 0
        try:     countBM = vCounts['Bad Moon']
        except:  countBM = 0
        try:     countNo = vCounts['Normal']
        except:  countNo = 0
        
        # Making the not-great assumption that all NS11 runs are HC and thus eligible for 200 karma. :V
        m5 = countHC*200 + countBM*200 + countNo*100 + 200*ns11count
        
        # METRIC #6 -- Number of 2019 runs that match our exclusion/inclusion criteria
        m6 = sum([x[0:4]=='2019' for x in exclHist.date])
        
        print('{}: {}, {}, {}, {}, {}, {}'.format(pName,m1,m2,m3,m4,m5,m6))
        
    else:
        return {
            'playerHistory':pHistoryDF,
            'ns11count':ns11count,
            'playerSummary':pSummary
        }
    
    
# NOTE; remember to toggle sampledata off before running 4real
playerData = playerLookup('Captain Scotch',True)

Captain Scotch: 99.9; One Crazy Random Summer 2/406, 0.999, 0.998, 84 (15), 24300, 34


In [91]:
playerData = playerLookup('Captain Scotch',False)
playerData['playerSummary']

Unnamed: 0,summ,days,tc,pctRank,rankText
One Crazy Random Summer,2-406,2,406,0.999065,#2 of 2139
Standard,2-418,2,418,0.998665,#11 of 8240
Two Crazy Random Summer,2-466,2,466,0.998442,#2 of 1284
Slow and Steady,5-406,5,406,0.998266,#4 of 2307
Gelatinous Noob,3-526,3,526,0.997543,#5 of 2035
Dark Gyffte,2-483,2,483,0.993785,#10 of 1609
KOLHS,3-604,3,604,0.993418,#16 of 2431
Disguises Delimit,2-437,2,437,0.988072,#12 of 1006
Pocket Familiars,4-858,4,858,0.967337,#39 of 1194
Live. Ascend. Repeat.,3-816,3,816,0.923214,#86 of 1120


In [92]:
playerData['playerHistory']

Unnamed: 0,date,type,path,class,days,turns,fam,fam%
16,2010-06-12,Hardcore,Teetotaler,Pastamancer,12,1553,Jill-O-Lantern,(44.4%)
17,2010-06-24,Hardcore,Teetotaler,Pastamancer,11,1473,Jill-O-Lantern,(35.8%)
18,2010-07-02,Hardcore,No path,Disco Bandit,9,1432,Green Pixie,(54.9%)
19,2010-07-09,Hardcore,No path,Disco Bandit,8,1301,Green Pixie,(43.4%)
20,2010-07-16,Hardcore,No path,Accordion Thief,6,1187,Green Pixie,(38.5%)
21,2010-07-22,Hardcore,No path,Accordion Thief,7,1343,Green Pixie,(51.7%)
22,2010-07-29,Hardcore,No path,Disco Bandit,8,1374,Green Pixie,(49.5%)
23,2011-10-19,Hardcore,No path,Accordion Thief,448,1375,Green Pixie,(46.2%)
24,2017-07-17,Hardcore,Standard,Sauceror,5,1056,Intergnat,(48.5%)
25,2017-07-21,Hardcore,License to Adventure,Pastamancer,6,1056,,
