In [41]:
%run ./PandaSoup.ipynb

You imported PandaSoup!


## PandaSoup Test Cases

In [42]:
!mkdir test
!mkdir test/data

mkdir: cannot create directory ‘test’: File exists
mkdir: cannot create directory ‘test/data’: File exists


In [43]:
def f(soup):
    if type(soup) is list:
        print(soup)
    players = {}
    for n, link in enumerate(soup.find_all(href=re.compile('/stats/players'))):
        name = link.text
        if name != "players":
            row = link.findParent().findParent()
            players[n] = [td.text for td in row.find_all('td')]
            players[n][0] = name
    return pd.DataFrame(players).T

test_params = {
    'base' : "http://fftoday.com/stats/playerstats.php?Season={}&GameWeek={}&PosID={}",
    'param_names' : ["season", "week", "position"],
    'request_delay' : 3,
    'extract_func': f,
}

### Test 1: Default behavior

In [44]:
twp = PandaSoup(test_params)
twp.scrape_all([
    (2015, 5, 10), 
    (2016, 6, 10)
], reset=True, verbose=True)
twp.make_dataframes(verbose=True)
twp.to_csv(verbose=True)

Clear raw data
parameter values (2015, 5, 10) --> key (2015, 5, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2015&GameWeek=5&PosID=10
parameter values (2016, 6, 10) --> key (2016, 6, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2016&GameWeek=6&PosID=10
(2015, 5, 10) yielded 1 rows of data
(2016, 6, 10) yielded 1 rows of data
Completed extracting data
Writing to test/data/15317090177_2015_5_10.csv
Writing to test/data/15317090177_2016_6_10.csv


['test/data/15317090177_2015_5_10.csv', 'test/data/15317090177_2016_6_10.csv']

### Test 2: Use grouping strategy

In [45]:
# Grouping strategy
test_params_with_group = deepcopy(test_params)
test_params_with_group['grouping_strategy'] = ['season', 'week']

twp = PandaSoup(test_params_with_group)
twp.scrape_all([
    (2014, 3, 10), 
    (2015, 10, 10), 
], verbose=True)
twp.make_dataframes(verbose=True)
twp.to_csv(verbose=True)


parameter values (2014, 3, 10) --> key (2014, 3)
Reading http://fftoday.com/stats/playerstats.php?Season=2014&GameWeek=3&PosID=10
parameter values (2015, 10, 10) --> key (2015, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2015&GameWeek=10&PosID=10
(2014, 3) yielded 1 rows of data
(2015, 10) yielded 1 rows of data
Completed extracting data
Writing to test/data/15317090300_2014_3.csv
Writing to test/data/15317090300_2015_10.csv


['test/data/15317090300_2014_3.csv', 'test/data/15317090300_2015_10.csv']

### Test 3: Custom CSV path

In [46]:
# TODO: I think this is broken... fix it later
test_params_with_csv = deepcopy(test_params)
test_params_with_csv['csv_base'] = "test/data/{}_{}_{}.csv"

twp = PandaSoup(test_params_with_csv)
twp.debug_str(2)
twp.scrape_all([
    (2015, 5, 10), 
    (2016, 6, 10)
], verbose=True)
twp.make_dataframes(verbose=True)
twp.to_csv(verbose=True)

Raw data: 0 items
    keys: dict_keys([])
Data: 0 items
Default output path: test/data/{}_{}_{}.csv
    keys: dict_keys([])
Initialized with params: {'base': 'http://fftoday.com/stats/playerstats.php?Season={}&GameWeek={}&PosID={}', 'param_names': ['season', 'week', 'position'], 'request_delay': 3, 'extract_func': <function f at 0x7f33cee33bf8>, 'csv_base': 'test/data/{}_{}_{}.csv'}
parameter values (2015, 5, 10) --> key (2015, 5, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2015&GameWeek=5&PosID=10
parameter values (2016, 6, 10) --> key (2016, 6, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2016&GameWeek=6&PosID=10
(2015, 5, 10) yielded 1 rows of data
(2016, 6, 10) yielded 1 rows of data
Completed extracting data
Writing to test/data/2015_5_10.csv
Writing to test/data/2016_6_10.csv


['test/data/2015_5_10.csv', 'test/data/2016_6_10.csv']

### Test 4: Merging

In [53]:
twp = PandaSoup(test_params)
twp.scrape_all([
    (2015, 1, 10), 
    (2016, 1, 10)
], verbose=True)
twp.make_dataframes(verbose=True)

twp2 = PandaSoup(test_params)
twp2.scrape_all([
    (2015, 1, 10), 
    (2014, 1, 10)
], verbose=True)
twp2.make_dataframes(verbose=True)

twp.merge(twp2, verbose=True)
for t in [(2015, 1, 10), (2014, 1, 10), (2016, 1, 10)]:
    assert t in twp.data
    assert t in twp.raw_data

parameter values (2015, 1, 10) --> key (2015, 1, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2015&GameWeek=1&PosID=10
parameter values (2016, 1, 10) --> key (2016, 1, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2016&GameWeek=1&PosID=10
(2015, 1, 10) yielded 1 rows of data
(2016, 1, 10) yielded 1 rows of data
Completed extracting data
parameter values (2015, 1, 10) --> key (2015, 1, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2015&GameWeek=1&PosID=10
parameter values (2014, 1, 10) --> key (2014, 1, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2014&GameWeek=1&PosID=10
(2015, 1, 10) yielded 1 rows of data
(2014, 1, 10) yielded 1 rows of data
Completed extracting data
36 rows added to (2015, 1, 10)
34 rows added to (2014, 1, 10)


In [None]:
# TODO: More test cases