In [5]:
import math
import pandas as pd
import re
import urllib.request
    

In [102]:
draft_url_format = 'http://dolphinsim.com/clmanager/epbl/year{0:04d}.txt'
draft_regex = re.compile('([0-9]+)\. ([a-z,A-Z,\s]*) draft ([0-9]+) ([A-Z]\.[A-Z][a-z,A-Z]+) \([A-Z]*.*\)<br>')
draft_list = {}
for season in range(58,89):
    print('Season {0:d}'.format(season))
    draft_url = draft_url_format.format(season)
    response = urllib.request.urlopen(draft_url)
    page = response.read()
    response.close()
    lines = page.decode("utf-8").split('\n')
    for line in lines:
        draft_matches = draft_regex.match(line)
        if draft_matches is not None:
            player_id = draft_matches.group(3)
            player_name = draft_matches.group(4)
            team_name = draft_matches.group(2)
            if team_name == 'Anaheim Angels':
                team_name = 'Anaheim'
            elif team_name == 'Arizona Diamondbacks':
                team_name = 'Arizona'
            elif team_name == 'Detroit Tigers':
                team_name = 'Detroit'
            draft_position = draft_matches.group(1)
            draft_round = math.ceil(int(draft_position)/30)
            round_position = int(draft_position) - (draft_round - 1) * 30
            
            if player_id not in draft_list.keys():
                draft_list[player_id] = {}
            draft_list[player_id]['player_name'] = player_name
            draft_list[player_id]['team_name'] = team_name
            draft_list[player_id]['draft_position'] = draft_position
            draft_list[player_id]['draft_round'] = draft_round
            draft_list[player_id]['round_position'] = round_position
            draft_list[player_id]['draft_season'] = season
            

Season 58
Season 59
Season 60
Season 61
Season 62
Season 63
Season 64
Season 65
Season 66
Season 67
Season 68
Season 69
Season 70
Season 71
Season 72
Season 73
Season 74
Season 75
Season 76
Season 77
Season 78
Season 79
Season 80
Season 81
Season 82
Season 83
Season 84
Season 85
Season 86
Season 87
Season 88


In [103]:
season_counts = {}
round_counts = {}
team_counts = {}

for player_id in draft_list.keys():
    if draft_list[player_id]['draft_season'] not in season_counts.keys():
        season_counts[draft_list[player_id]['draft_season']] = 1
    else:
        season_counts[draft_list[player_id]['draft_season']] += 1
        
    if draft_list[player_id]['draft_round'] not in round_counts.keys():
        round_counts[draft_list[player_id]['draft_round']] = 1
    else:
        round_counts[draft_list[player_id]['draft_round']] += 1
        
    if draft_list[player_id]['team_name'] not in team_counts.keys():
        team_counts[draft_list[player_id]['team_name']] = 1
    else:
        team_counts[draft_list[player_id]['team_name']] += 1
    
    #print('{} {} drafted in season {} with pick {} of round {} by {}'.format(player_id, draft_list[player_id]['player_name'],
    #                                                                        draft_list[player_id]['draft_season'], 
    #                                                                        draft_list[player_id]['round_position'],
    #                                                                        draft_list[player_id]['draft_round'],
    #                                                                        draft_list[player_id]['team_name']))
    
print('Season counts:')
for season in sorted(season_counts.keys()):
    print('{} from season {}'.format(season_counts[season], season))

print('\nRound counts:')
for draft_round in sorted(round_counts.keys()):
    print('{} from round {}'.format(round_counts[draft_round], draft_round))

print('\nTeam counts:')
for team_name in sorted(team_counts.keys()):
    print('{} from team {}'.format(team_counts[team_name], team_name))

Season counts:
1 from season 59
1 from season 61
1 from season 62
6 from season 63
8 from season 64
6 from season 66
12 from season 67
14 from season 68
12 from season 69
18 from season 70
40 from season 71
36 from season 72
46 from season 73
59 from season 74
58 from season 75
92 from season 76
89 from season 77
86 from season 78
114 from season 79
217 from season 80
273 from season 81
359 from season 82
472 from season 83
587 from season 84
703 from season 85
866 from season 86
1419 from season 87
1500 from season 88

Round counts:
413 from round 1
336 from round 2
308 from round 3
269 from round 4
269 from round 5
252 from round 6
248 from round 7
206 from round 8
209 from round 9
211 from round 10
209 from round 11
187 from round 12
179 from round 13
173 from round 14
163 from round 15
177 from round 16
156 from round 17
143 from round 18
149 from round 19
137 from round 20
128 from round 21
133 from round 22
126 from round 23
113 from round 24
120 from round 25
109 from round 26
1

In [99]:
team_url_format = 'http://dolphinsim.com/clmanager/epbl/ros{0:03d}.htm'
words_regex = re.compile('\s+')
hist_url_regex = re.compile('.*<a href=\"(.*)\#([0-9]+)\">(.*)</a>.*')
age_regex = re.compile('([0-9]{2})r*')
    

class Player:
    def __init__(self, team, line):
        line = line.strip()
        url_matches = hist_url_regex.match(line)
        age_col = 3
        pos_col = 2
        name_col = 1
        id_col = 0
        attrs = words_regex.split(line)
        if url_matches is not None:
            self.history_url = url_matches.group(1)
            self.player_id = url_matches.group(2)
            self.name = url_matches.group(3)
            age_col += 1
            pos_col += 1
        else:
            self.history_url = None
            self.player_id = attrs[id_col].strip()
            self.name = attrs[name_col].strip()
            
        self.team = Team
        self.age = 0
        self.draft_season = 0
        self.draft_round = 0
        self.draft_position = 0
        self.draft_round_position = 0
        self.position = attrs[pos_col].strip()
        age_matches = age_regex.match(attrs[age_col].strip())
        if age_matches is not None:
            self.age = age_matches.group(1)
            
    def __str__(self):
        if self.history_url is not None:
            return '<a href="{0}#{1}">{1}: {2}</a> {3} {4}'.format(self.history_url, 
                                                                   self.player_id, self.name, 
                                                                   self.position, self.age)
        return '{} {} {} {}'.format(self.player_id, self.name, self.position, self.age)
    

class Team:
    def __init__(self, team_id, level='majors', parent='None'):
        self.team_id = team_id
        self.url = team_url_format.format(team_id)
        self.level = level
        self.parent = parent
        self.players = {}
        self.read_roster()
        if level == 'majors':
            self.aaa = Team(team_id + 30, 'AAA', self)
            self.aa = Team(team_id + 60, 'AA', self)
            self.hia = Team(team_id + 90, 'hi-A', self)
            self.a = Team(team_id + 120, 'A', self)
            self.hir = Team(team_id + 150, 'hi-R', self)
            self.r = Team(team_id + 180, 'R', self)
        
    def __str__(self):
        ret_str = '{0} ({1}): {2}'.format(self.name, self.level, self.url)
        if self.level == 'majors':
            ret_str += '\n\t{0}'.format(self.aaa.__str__())
            ret_str += '\n\t{0}'.format(self.aa.__str__())
            ret_str += '\n\t{0}'.format(self.hia.__str__())
            ret_str += '\n\t{0}'.format(self.a.__str__())
            ret_str += '\n\t{0}'.format(self.hir.__str__())
            ret_str += '\n\t{0}'.format(self.r.__str__())
        return ret_str
    
    def read_roster(self):
        response = urllib.request.urlopen(self.url)
        page = response.read()
        response.close()
        self.set_team_name(page)
        blocks = page.__str__().split('<pre>')
        block = 4
        if self.level != 'majors':
            block -= 1
        if self.level[:-1] == 'r':
            block -= 1
        attr_block = blocks[block].split('</pre>')[0]
        print(self.name + '\n')
        #print(self.name + '\n' + '\n'.join(attr_block.split('\\n')[:-1]) + '\n\n')
        self.process_block(attr_block)
        print('\n')
        
    def process_block(self, block):
        lines = block.split('\\n')
        header = words_regex.split(lines[0])
        for line in lines[1:]:
            if len(line.strip()) == 0:
                continue
            player = Player(self, line)
            print(player)
            self.players[player.player_id] = player
        
    def set_team_name(self, page):
        title = page.__str__().split('</title>')[0]
        title = title.split('<title>\\n')[1]
        self.name = title

            
teams = {}
for team_id in range(1,31):
    team = Team(team_id)
    teams[team.name] = team
    print(team)
    break


Season 58
Season 59
Season 60
Season 61
Season 62
Season 63
Season 64
Season 65
Season 66
Season 67
Season 68
Season 69
Season 70
Season 71
Season 72
Season 73
Season 74
Season 75
Season 76
Season 77
Season 78
Season 79
Season 80
Season 81
Season 82
Season 83
Season 84
Season 85
Season 86
Season 87
2424 T.Hyde drafted in season 86 with pick 30 of round 24 by Chicago AL
2144 A.Castro drafted in season 86 with pick 23 of round 35 by Detroit
5924 T.Donlon drafted in season 82 with pick 19 of round 3 by San Francisco
3186 P.Massop drafted in season 87 with pick 19 of round 14 by Tampa Bay
2815 R.Wilson drafted in season 83 with pick 21 of round 14 by San Francisco
1222 C.Culver drafted in season 87 with pick 20 of round 28 by Seattle
907 S.Kinsey drafted in season 87 with pick 23 of round 8 by Chicago NL
3279 J.Peters drafted in season 84 with pick 2 of round 20 by Washington
5055 T.Soltero drafted in season 80 with pick 1 of round 4 by Seattle
4993 M.Maciel drafted in season 84 with pick 