In [14]:
%run ./PandaSoup.ipynb

You imported PandaSoup!


## PandaSoup Test Cases

In [15]:
!mkdir test
!mkdir test/data

mkdir: cannot create directory ‘test’: File exists
mkdir: cannot create directory ‘test/data’: File exists


In [16]:
def f(soup):
    players = {}
    for n, link in enumerate(soup.find_all(href=re.compile('/stats/players'))):
        name = link.text
        if name != "players":
            row = link.findParent().findParent()
            players[n] = [td.text for td in row.find_all('td')]
            players[n][0] = name
    return pd.DataFrame(players).T

test_params = {
    'base' : "http://fftoday.com/stats/playerstats.php?Season={}&GameWeek={}&PosID={}",
    'param_names' : ["season", "week", "position"],
    'request_delay' : 3,
    'extract_func': f,
}

### Test 1: Default behavior

In [17]:
twp = PandaSoup(test_params)
twp.scrape_all([
    (2015, 5, 10), 
    (2016, 6, 10)
], verbose=True)
twp.make_dataframes(verbose=True)
twp.to_csv(verbose=True)

parameter values (2015, 5, 10) --> key (2015, 5, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2015&GameWeek=5&PosID=10
parameter values (2016, 6, 10) --> key (2016, 6, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2016&GameWeek=6&PosID=10
(2015, 5, 10) yielded 1 rows of data
(2016, 6, 10) yielded 1 rows of data
Completed extracting data
Writing to test/data/15315241259_2015_5_10.csv
Writing to test/data/15315241259_2016_6_10.csv


['test/data/15315241259_2015_5_10.csv', 'test/data/15315241259_2016_6_10.csv']

### Test 2: Use grouping strategy

In [18]:
# Grouping strategy
test_params_with_group = deepcopy(test_params)
test_params_with_group['grouping_strategy'] = ['season', 'week']

twp = PandaSoup(test_params_with_group)
twp.scrape_all([
    (2014, 3, 10), 
    (2015, 10, 10), 
], verbose=True)
twp.make_dataframes(verbose=True)
twp.to_csv(verbose=True)



parameter values (2014, 3, 10) --> key (2014, 3)
Reading http://fftoday.com/stats/playerstats.php?Season=2014&GameWeek=3&PosID=10
parameter values (2015, 10, 10) --> key (2015, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2015&GameWeek=10&PosID=10
(2015, 5, 10) yielded 1 rows of data
(2016, 6, 10) yielded 1 rows of data
(2014, 3) yielded 1 rows of data
(2015, 10) yielded 1 rows of data
Completed extracting data
Writing to test/data/15315241326_2015_5.csv
Writing to test/data/15315241326_2016_6.csv
Writing to test/data/15315241326_2014_3.csv
Writing to test/data/15315241326_2015_10.csv


['test/data/15315241326_2015_5.csv',
 'test/data/15315241326_2016_6.csv',
 'test/data/15315241326_2014_3.csv',
 'test/data/15315241326_2015_10.csv']

### Test 3: Custom CSV path

In [19]:
# TODO: I think this is broken... fix it later
test_params_with_csv = deepcopy(test_params)
test_params_with_csv['csv_base'] = "test/data/{}_{}_{}.csv"

twp = PandaSoup(test_params_with_csv)
twp.debug_str(2)
twp.scrape_all([
    (2015, 5, 10), 
    (2016, 6, 10)
], verbose=True)
twp.make_dataframes(verbose=True)
twp.to_csv(verbose=True)

Raw data: 4 items
    keys: dict_keys([(2015, 5, 10), (2016, 6, 10), (2014, 3), (2015, 10)])
Data: 4 items
Default output path: test/data/{}_{}_{}.csv
    keys: dict_keys([(2015, 5, 10), (2016, 6, 10), (2014, 3), (2015, 10)])
Initialized with params: {'base': 'http://fftoday.com/stats/playerstats.php?Season={}&GameWeek={}&PosID={}', 'param_names': ['season', 'week', 'position'], 'request_delay': 3, 'extract_func': <function f at 0x7f8428da8e18>, 'csv_base': 'test/data/{}_{}_{}.csv'}
parameter values (2015, 5, 10) --> key (2015, 5, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2015&GameWeek=5&PosID=10
parameter values (2016, 6, 10) --> key (2016, 6, 10)
Reading http://fftoday.com/stats/playerstats.php?Season=2016&GameWeek=6&PosID=10
(2015, 5, 10) yielded 2 rows of data
(2016, 6, 10) yielded 2 rows of data
(2014, 3) yielded 1 rows of data
(2015, 10) yielded 1 rows of data
Completed extracting data
Writing to test/data/2015_5_10.csv
Writing to test/data/2016_6_10.csv


IndexError: tuple index out of range

In [None]:
# TODO: More test cases