# Fantasy Football Data Collection Notebook

In [1]:
# Import libraries
from bs4 import BeautifulSoup
from collections import defaultdict
import json
import numpy as np
import os.path
import pprint
import requests


In [2]:
# User input block
league_id = "748024"
season_id = "2017"
data_directory = season_id + "-season-data"
regular_season_length = 13
owners_json = { '1':  {'owner': 'Sean Fitzgerald'},
                    '2':  {'owner': 'Darryl McFarland'},
                    '3':  {'owner': 'Jason McMillan'},
                    '5':  {'owner': 'Bradley Putman'},
                    '7':  {'owner': 'Jason Smith'},
                    '8':  {'owner': 'Rick Toms'},
                    '10': {'owner': 'Jerry LeBlanc'},
                    '11': {'owner': 'Matt Zeback'},
                    '12': {'owner': 'James Waller'},
                    '13': {'owner': 'Zach Hall'}}


In [3]:
# Create data directory
if not os.path.isdir(data_directory):
    !mkdir $data_directory
    
if not os.path.isfile(data_directory + "/owners.json"):
    new_file = data_directory + "/owners.json"
    !touch $new_file
    
if not os.path.isfile(data_directory + "/players.json"):
    new_file = data_directory + "/players.json"
    !touch $new_file
    
if not os.path.isfile(data_directory + "/schedules.json"):
    new_file = data_directory + "/schedules.json"
    !touch $new_file
    

### This code block will save the following information for each league member:
1. Owner information (owner id's, owner names, team names, and abbreviations) from his [clubhouse webpage](http://games.espn.com/ffl/clubhouse?leagueId=748024&teamId=5&seasonId=2017).
2. Schedule information from his [schedule webpage](http://games.espn.com/ffl/schedule?leagueId=748024&teamId=5).

In [4]:
schedules_json = {}

# Loop over owner id's 
for team_id in owners_json:
    team_id = str(team_id)
    
    # Use clubhouse webpages to gather owner id's, owner names, team names, and abbreviations for owners.json 
    current_url = "http://games.espn.com/ffl/clubhouse?leagueId=" + league_id + "&seasonId=" + season_id + \
        "&teamId=" + team_id 
    current_team = BeautifulSoup(requests.get(current_url).text, 'html.parser').find('h3', {'class': 'team-name'})
    
    owners_json[team_id]['team'] = str(current_team.text.split(' (')[0])
    owners_json[team_id]['abbv'] = str(current_team.find('em').text.strip('(').strip(')'))
    owners_json[team_id]['weekly_rosters'] = {'0':{}}
    
    # Use schedule webpages to gather regular season schedules for schedules.json
    current_url = "http://games.espn.com/ffl/schedule" + "?leagueId=" + league_id + "&seasonId=" + season_id + \
        "&teamId=" + team_id
    schedule_html = BeautifulSoup(requests.get(current_url).text, 'html.parser')
    schedule_table = list(schedule_html.find('table', {'class': 'tableBody'}).find_all('tr'))
   
    current_schedule = {}
    for i in range(2, 2 + regular_season_length):
        row = schedule_table[i].find_all('td')
        week = str(row[0].text.strip('Week '))
        opponent = str(row[3].find('a')['href'].split('&')[1].strip('teamId='))
        
        current_schedule[week] = opponent
    schedules_json[team_id] = current_schedule
    

### This code block will gather the league draft results for the current season
- Information is gathered from the [draft recap webpage](http://games.espn.com/ffl/tools/draftrecap?leagueId=748024&seasonId=2017).
- Each owners drafted roster is saved, along with the season point total for each of those players.

In [5]:
players_json = {}

# Gather data from draft recap webpage
current_url = "http://games.espn.com/ffl/tools/draftrecap" + "?leagueId=" + league_id + "&seasonId=" + season_id
draft_html = BeautifulSoup(requests.get(current_url).text, 'html.parser')\
    .find('div', {'class': 'games-fullcol games-fullcol-extramargin'})
draft_table = list(draft_html.find('table').find_all('td'))

# Loop over draft rounds
for i in range(0, len(draft_table), 32):
    current_round = list(draft_table[i].find('table').find_all('tr'))
    
    # Loop over individual picks
    for j in range(1, len(current_round)):
        current_pick = {}
        
        # Extract all player info for current draft pick
        raw_value = current_round[j].find_all('td')
        player_info = str(raw_value[1].text.replace(u'\xa0', ' ')).split(' ')
        
        # Extract player position
        player_pos = str(player_info[-1])
        current_pick['position'] = player_pos
        
        # Extract player name and NFL team 
        # Note: HTML formatting is slightly different for "D/ST" players as opposed to other positions
        if player_pos != 'D/ST':
            current_pick['team'] = player_info[-2]
            current_pick['name'] = " ".join(player_info[:-2]).strip(',').strip('*')
        else:
            current_pick['name'] = " ".join(player_info[:-1]).strip(',').strip('*')
        
        # Add extracted player information to the player DB
        player_id = str(raw_value[1].find('a')['playerid'])
        players_json[player_id] = current_pick
        
        # Add <player id: season points total> for this player to the owner's week 0 roster 
        owner_id = str(raw_value[2].find('a')['href']).split('&')[1].strip('teamId=')
        current_url = "http://games.espn.com/ffl/freeagency" + "?leagueId=" + league_id + "&seasonId=" + season_id + \
            "&avail=-1" + "&playerId=" + player_id
            
        season_score = str(BeautifulSoup(requests.get(current_url).text, 'html.parser').find('td', \
            {'class': 'playertableStat appliedPoints sortedCell'}).text)
        if season_score == '--':
            season_score = '0'
        owners_json[owner_id]['weekly_rosters']['0'][player_id] = season_score

### This code block will gather each owner's roster information for every week in the regular season
- Information is gathered from the [quick box score webpage](http://games.espn.com/ffl/boxscorequick?leagueId=748024&teamId=11&scoringPeriodId=1&seasonId=2017&view=scoringperiod&version=quick).
- Each owners active and bench lineups are saved, along with the weekly point totals for each of those players.

In [6]:
# Loop over weeks in the regular season
for week_id in range(1, regular_season_length + 1):
    week_id = str(week_id)
    print "Collecting week %s rosters... " %week_id
    
    # Loop over owner id's
    for team_id in owners_json:
        owners_json[team_id]['weekly_rosters'][week_id] = {'lineup': {}, 'bench': {}}
        current_url = "http://games.espn.com/ffl/boxscorequick" + "?leagueId=" + league_id + "&seasonId=" + season_id \
            + "&teamId=" + team_id +  "&scoringPeriodId=" + week_id  + "&view=scoringperiod" + "&version=quick"
                
        # Get active lineup for current owner during current week from the "quick box score" webpage
        lineup_html = BeautifulSoup(requests.get(current_url).text, 'html.parser').find(id="playertable_0")
        lineup_table = list(lineup_html.find_all('tr'))[3:12]
        
        # Loop over players in active lineup
        for player in lineup_table:
            
            # Extract all player info
            info = str(player.find_all('td')[1].text.replace(u'\xa0', ' ')).split(' ')
            
            # Extract player id and name
            player_id = player.find_all('a')[0]['playerid']
            name = (info[0] + " " + info[1]).strip(',').strip('*')
            
            # Extract player position (Note: HTML is formatted differently for suspended players)
            pos = info[-1]
            if pos == 'SSPD':
                pos = info[-3]
            
            # Only add new players to player DB
            if player_id not in players_json:
                players_json[str(player_id)] = {'name' : name, 'position': pos}
                
            # Add <player id: current week's score> to this owner's weekly lineup in the owner's DB 
            score = str(player.find_all('td')[-1].text)
            if score == '--':
                score = '0'
            owners_json[team_id]['weekly_rosters'][week_id]['lineup'][player_id] = score
            
            
        # Get bench lineup for current owner during current week from the "quick box score" webpage
        bench_html = BeautifulSoup(requests.get(current_url).text, 'html.parser').find(id="playertable_1")
        bench_table = list(bench_html.find_all('tr'))[2:9]
        
        # Loop over players in active lineup
        for player in bench_table:
            
            # Extract all player info
            info = str(player.find_all('td')[1].text.replace(u'\xa0', ' ')).split(' ')
            
            # Extract player id and name
            player_id = player.find_all('a')[0]['playerid']
            name = (info[0] + " " + info[1]).strip(',').strip('*')
            
            # Extract player position (Note: HTML is formatted differently for suspended players)
            pos = info[-1]
            if pos == 'SSPD':
                pos = info[-3]
            
            # Only add new players to player DB
            if player_id not in players_json:
                players_json[str(player_id)] = {'name' : name, 'position': pos}
            
            # Add <player id: current week's score> to this owner's weekly bench in the owner's DB
            score = str(player.find_all('td')[-1].text)
            if score == '--':
                score = '0'
            owners_json[team_id]['weekly_rosters'][week_id]['bench'][str(player_id)] = score
        
print "Done."


Collecting week 1 rosters... 
Collecting week 2 rosters... 
Collecting week 3 rosters... 
Collecting week 4 rosters... 
Collecting week 5 rosters... 
Collecting week 6 rosters... 
Collecting week 7 rosters... 
Collecting week 8 rosters... 
Collecting week 9 rosters... 
Collecting week 10 rosters... 
Collecting week 11 rosters... 
Collecting week 12 rosters... 
Collecting week 13 rosters... 
Done.


### These final codeblocks write all of the information saved above to the specified data directory
- There should be three seperate data files (owners.json, players.json, and schedules.json).
- Refer to the [data dictionary](./data-dictionary.txt) for an explanation of each file's contents and format.

In [7]:
# Write schedule info to schedules.json
with open(data_directory + "/schedules.json", 'w') as f:
    f.write(pprint.pformat(schedules_json))

# print "Schedule Info\n" + "".join(["=" for i in range(100)])
# with open(data_directory + "/schedules.json", 'r') as f:
#     print f.read()

In [8]:
# Write owner info to owners.json
with open(data_directory + "/owners.json", 'w') as f:
    f.write(pprint.pformat(owners_json))

# print "Current Owner Info\n" + "".join(["=" for i in range(100)])
# with open(data_directory + "/owners.json", 'r') as f:
#     print f.read()

In [9]:
# Write player info to players.json
with open(data_directory + "/players.json", 'w') as f:
    f.write(pprint.pformat(players_json))

# print "Relevant Player Info\n" + "".join(["=" for i in range(100)])
# with open(data_directory + "/players.json", 'r') as f:
#     print f.read()