# Total of 128651 entries, 353 players, 24 teams, 250 games

## PBP Formatting
* each entry tags (x,y) coordinates of one player only with one event
* events involving multiple players (e.g. blocking fouls (shooter, fouler), blocked shot (shooter, blocker)) are listed as separate consecutive events 
* entries without applicable coordinates are given ('', '')
* ~515 pbp events per game
* (unconfirmed) 389 players from roster.csv - 353 players from pbp.csv = 36 players are in a roster yet they are never involved in a play. Probably means they got (almost) no playing time.
* substitutions are logged (event_desc_id: 'IN', 'OUT')
* missed/made shots and foul shots are described with totals (made/total) for the player in game
* (unconfirmed) missed/total tallys are separated for types of shots (ft, 2pt, 3pt)
* game_clock in minutes:seconds only

### Build Dictionary

In [2]:
import csv

In [3]:
f = open('DataChallengeEuroGamePBP.csv', 'r')

f.seek(0)
s = f.read()
s = 'index' + s
f.close()

fout = open('pbp.csv', 'w')
fout.write(s)
fout.close()

In [4]:
f = open('pbp.csv', 'r')
reader = csv.DictReader(f, delimiter=',', quotechar='"')
pbp=[]
for line in reader:
    pbp.append(line)
f.close()

### Housekeeping / Basic stats

In [5]:
len(pbp)

128651

#### sample game

In [6]:
for i, p in enumerate(pbp):
    if p['gamecode'] == '125':
        print(i, p)

63847 {'team_id': '', 'event_desc': 'Begin Period', 'index': '63847', 'coord_y': '', 'player_jersey_number': '', 'period': '1', 'event_desc_id': 'BP', 'pbp_id': '1', 'score_visitor': '', 'coord_x': '', 'score_home': '', 'gamecode': '125', 'team_name': '', 'player_id': '', 'player_name': '', 'game_minute': '1', 'seasoncode': 'E2015', 'game_clock': '10:00'}
63848 {'team_id': 'BAM', 'event_desc': 'Assist (1)', 'index': '63848', 'coord_y': '', 'player_jersey_number': '6', 'period': '1', 'event_desc_id': 'AS', 'pbp_id': '2', 'score_visitor': '', 'coord_x': '', 'score_home': '', 'gamecode': '125', 'team_name': 'Brose Baskets Bamberg', 'player_id': 'PABN', 'player_name': 'ZISIS, NIKOS', 'game_minute': '1', 'seasoncode': 'E2015', 'game_clock': '09:56'}
63849 {'team_id': 'BAM', 'event_desc': 'Foul Drawn (1)', 'index': '63849', 'coord_y': '', 'player_jersey_number': '43', 'period': '1', 'event_desc_id': 'RV', 'pbp_id': '3', 'score_visitor': '', 'coord_x': '', 'score_home': '', 'gamecode': '125',

In [7]:
gamecodes=[]
for r in pbp:
    if r['gamecode'] not in gamecodes:
        gamecodes.append(r['gamecode'])
print(len(gamecodes))

250


#### 24 teams + '' team for team-neutral events (e.g. start of period)

In [8]:
teamids=[]
for r in pbp:
    if r['team_id'] not in teamids:
        teamids.append(r['team_id'])
len(teamids)

25

#### 353 players + '' player for events with no involved players (353 is not confirmed)

In [9]:
playerids=[]
for r in pbp:
    if r['player_id'] not in playerids:
        playerids.append(r['player_id'])
len(playerids)

354

In [10]:
playernames=[]
for r in pbp:
    if r['player_name'] not in playernames:
        playernames.append(r['player_name'])
len(playernames)

355

In [11]:
idnames=[]
for r in pbp:
    if (r['player_id'], r['player_name']) not in idnames:
        idnames.append((r['player_id'], r['player_name']))
len(idnames)

358

In [12]:
dupeids = set([id for id in [idname[0] for idname in idnames] if [idname[0] for idname in idnames].count(id) > 1])
print(dupeids)

{'P004264', 'P005983', 'P001389', 'P005367'}


In [13]:
dupenames = set([player for player in [idname[1] for idname in idnames] if [idname[1] for idname in idnames].count(player) > 1])
print(dupenames)

{'', 'YURTSEVEN, OMER'}


In [14]:
for d in dupenames:
    temp=[]
    for r in pbp:
        if r['player_name'] == d:
            if r['player_id'] not in temp:
                temp.append(r['player_id'])
                print(r)
    print('-'*30)

{'team_id': '', 'event_desc': 'Begin Period', 'index': '0', 'coord_y': '', 'player_jersey_number': '', 'period': '1', 'event_desc_id': 'BP', 'pbp_id': '1', 'score_visitor': '', 'coord_x': '', 'score_home': '', 'gamecode': '1', 'team_name': '', 'player_id': '', 'player_name': '', 'game_minute': '1', 'seasoncode': 'E2015', 'game_clock': '10:00'}
{'team_id': 'MAD', 'event_desc': 'Coach Foul (1)', 'index': '309', 'coord_y': '', 'player_jersey_number': '', 'period': '3', 'event_desc_id': 'C', 'pbp_id': '310', 'score_visitor': '', 'coord_x': '', 'score_home': '', 'gamecode': '1', 'team_name': 'Real Madrid', 'player_id': 'CO_B', 'player_name': '', 'game_minute': '26', 'seasoncode': 'E2015', 'game_clock': '04:59'}
{'team_id': 'PAN', 'event_desc': 'Coach Foul (1)', 'index': '10355', 'coord_y': '', 'player_jersey_number': '', 'period': '4', 'event_desc_id': 'C', 'pbp_id': '455', 'score_visitor': '', 'coord_x': '', 'score_home': '', 'gamecode': '20', 'team_name': 'Panathinaikos Athens', 'player_i

In [15]:
for d in dupeids:
    temp=[]
    for r in pbp:
        if r['player_id'] == d:
            if r['player_name'] not in temp:
                temp.append(r['player_name'])
                print(r)
    print('-'*30)

{'team_id': 'GSS', 'event_desc': 'In', 'index': '3133', 'coord_y': '', 'player_jersey_number': '1', 'period': '1', 'event_desc_id': 'IN', 'pbp_id': '48', 'score_visitor': '', 'coord_x': '', 'score_home': '', 'gamecode': '7', 'team_name': 'Stelmet Zielona Gora', 'player_id': 'P004264', 'player_name': 'BOST, DEMARQUIS', 'game_minute': '6', 'seasoncode': 'E2015', 'game_clock': '04:40'}
{'team_id': 'GSS', 'event_desc': 'In', 'index': '9449', 'coord_y': '', 'player_jersey_number': '1', 'period': '1', 'event_desc_id': 'IN', 'pbp_id': '73', 'score_visitor': '', 'coord_x': '', 'score_home': '', 'gamecode': '19', 'team_name': 'Stelmet Zielona Gora', 'player_id': 'P004264', 'player_name': 'BOST, DEE', 'game_minute': '8', 'seasoncode': 'E2015', 'game_clock': '02:40'}
------------------------------
{'team_id': 'ULK', 'event_desc': 'In', 'index': '1049', 'coord_y': '', 'player_jersey_number': '14', 'period': '1', 'event_desc_id': 'IN', 'pbp_id': '25', 'score_visitor': '', 'coord_x': '', 'score_home

In [16]:
event_desc=[]
for r in pbp:
    if r['event_desc'] not in event_desc:
        event_desc.append(r['event_desc'])
len(event_desc)

3114

In [17]:
event_desc_id=[]
for r in pbp:
    if r['event_desc_id'] not in event_desc_id:
        event_desc_id.append(r['event_desc_id'])
print(len(event_desc_id))
print(event_desc_id)

29
['BP', '2FGAB', 'AG', 'FV', 'D', '3FGA', 'O', 'TO', '2FGA', '3FGM', 'AS', 'ST', 'RV', 'CM', 'IN', 'OUT', '2FGM', 'OF', 'TOUT_TV', 'FTA', 'FTM', '3FGAB', 'TOUT', 'C', 'CMU', 'CMT', 'EG', 'B', 'CMD']


### Analysis

#### Identify the top 5 teams & players in offensive rebounding. Describe the metric(s) used and explain why you used it.

In [22]:
orebs_player = {}
orebs_team = {}
missedshots_team = {}
for r in pbp:
    if r['event_desc_id'] == 'O':
        if r['player_id'] not in orebs_player.keys():
            orebs_player[r['player_id']] = []
        orebs_player[r['player_id']].append(r)
        if r['team_id'] not in orebs_team.keys():
            orebs_team[r['team_id']] = []
        orebs_team[r['team_id']].append(r)
    elif any([r['event_desc_id'] == e for e in ('2FGA', '2FGAB', '3FGA', '3FGAB')]):
        #consider last missed free throw
        if r['team_id'] not in missedshots_team.keys():
            missedshots_team[r['team_id']] = []
        missedshots_team[r['team_id']].append(r)
        
for key, value in orebs_team.items():
    print(key, len(value), '/', len(missedshots_team[key]))

MUN 99 / 350
TEL 123 / 356
MAD 312 / 942
PAN 278 / 856
KHI 272 / 831
RED 282 / 864
TIV 331 / 1022
KSK 94 / 345
SAS 75 / 346
ZAL 244 / 824
CED 236 / 795
MIL 107 / 367
GSS 102 / 344
ULK 303 / 925
IST 276 / 791
DAR 237 / 769
BAM 186 / 716
LMG 87 / 309
STR 99 / 344
CSK 277 / 843
OLY 283 / 836
BAR 283 / 929
MAL 313 / 821
BAS 297 / 1012
