In [137]:
import numpy

In [23]:
from pymongo import MongoClient

In [40]:
import pandas

In [41]:
import time

In [3]:
client = MongoClient()

In [4]:
db = client.NBASD

In [8]:
momsum_coll = db.moments_per_game
moments_coll = db.Moments
games_coll = db.Games

In [10]:
import pprint

In [212]:
gids = [g["_id"] for g in momsum_coll.find()]

In [213]:
gid = gids[0]
gid

'0021400010'

In [109]:
game = games_coll.find_one({"game_id" : gid})

In [110]:
pbp = game['play_by_play']

In [111]:
PBP_HEADER = ["GAME_ID",
          "EVENTNUM",
          "EVENTMSGTYPE",
          "EVENTMSGACTIONTYPE",
          "PERIOD",
          "WCTIMESTRING",
          "PCTIMESTRING",
          "HOMEDESCRIPTION",
          "NEUTRALDESCRIPTION",
          "VISITORDESCRIPTION",
          "SCORE",
          "SCOREMARGIN"]

In [112]:
def preprocessPbp(pbp):
    pbp = pbpDict2Df(pbp)
    pbp = addGameClock(pbp)
    return(pbp)


def pbpDict2Df(pbp):
    # Build the play-by-play DataFrame from the pbp dictionary
    p_ord = []
    if 'play_by_play' in list(pbp.keys()):
        for i in sorted([int(k) for k in list(pbp['play_by_play'].keys())]):
                p_ord.append(pbp['play_by_play'][str(i)])
    else:
        for i in sorted([int(k) for k in list(pbp.keys())]):
                p_ord.append(pbp[str(i)])
    pbp = pandas.DataFrame(p_ord,
                           columns=PBP_HEADER)
    pbp = pbp[PBP_HEADER[1:]]

    return(pbp)


def addGameClock(pbp):
    gc = [time2Gc(t) for t in pbp.PCTIMESTRING]
    gc = pandas.DataFrame(data = gc,
                          columns = ['GAMECLOCK'])
    pbp = pbp.join(gc)
    return(pbp)


def time2Gc(time):
    gc = 60 * int(time.split(':')[0]) + int(time.split(':')[1])
    return(gc)

In [113]:
pbp = preprocessPbp(pbp)

In [114]:
pbp.head()

Unnamed: 0,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,SCORE,SCOREMARGIN,GAMECLOCK
0,0,12,0,1,8:12 PM,12:00,,,,,,720
1,1,10,0,1,8:12 PM,12:00,Jump Ball Dalembert vs. Gasol: Tip to Larkin,,,,,720
2,2,2,1,1,8:12 PM,11:39,MISS Shumpert 21' Jump Shot,,,,,699
3,3,4,0,1,8:12 PM,11:38,Stoudemire REBOUND (Off:1 Def:0),,,,,698
4,4,2,1,1,8:13 PM,11:24,MISS Dalembert 13' Jump Shot,,,,,684


### Retrieve All Moments for a Game

In [115]:
gid

'0021400010'

In [146]:
moments = [m for m in moments_coll.find({'game_id': gid, 'game_clock_start' : {'$ne' : 'None'}})]

In [170]:
moments = pandas.DataFrame(moments)

In [172]:
moments.shape

(261, 24)

### Find Plays that go with Moment

In [163]:
moment = moments[50]

In [164]:
moment['game_clock_start']

119.72

In [165]:
moment['game_clock_end']

91.58

In [216]:
def extract_moment_plays(plays, mom):
    return(plays[plays.apply(lambda x: x['PERIOD'] == mom['quarter'] and \
                  x['GAMECLOCK'] >= numpy.ceil(mom['game_clock_end']) and \
                  x['GAMECLOCK'] <= numpy.floor(mom['game_clock_start']), axis=1)])

In [217]:
extract_moment_plays(pbp, moment)

Unnamed: 0,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,SCORE,SCOREMARGIN,GAMECLOCK
83,103,2,1,1,8:36 PM,1:56,MISS Smith 19' Jump Shot,,,,,116
84,104,4,0,1,8:36 PM,1:55,,,Gibson REBOUND (Off:0 Def:1),,,115
85,105,1,1,1,8:36 PM,1:42,,,Gibson 13' Jump Shot (3 PTS) (Dunleavy 2 AST),16 - 18,2.0,102


In [214]:
def extract_play_moments(play, moments):
    # Assume we already have all moments for the game queried
    moment_filter = lambda mom: mom['quarter'] == play['PERIOD'] and \
                                play['GAMECLOCK'] >= numpy.ceil(mom['game_clock_end']) and \
                                play['GAMECLOCK'] <= numpy.floor(mom['game_clock_start'])
    return(moments[moments.apply(lambda x: moment_filter(x), axis=1)])

In [215]:
extract_play_moments(pbp.ix[83], moments)

Unnamed: 0,_id,away,event_id,game_clock,game_clock_end,game_clock_start,game_date,game_id,home,moment,...,player_ids_home,quarter,radius,shot_clock,shot_clock_end,shot_clock_start,team_id,timestamp,x_loc,y_loc
49,5631532458a05c3c78db0144,Chicago Bulls,74,"[133.0, 133.0, 133.0, 133.0, 133.0, 133.0, 133...",113.74,133.0,2014-10-29,21400010,New York Knicks,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, ...",...,"[203499, 2405, 203112, 203501, 204037, 2546, 2...",1,"[4.23423, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0...","[24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24....",22.09,24.0,"[-1, 1610612752, 1610612752, 1610612752, 16106...","[1414628846179, 1414628846179, 1414628846179, ...","[74.82868, 63.73916, 87.41793, 88.37236, 82.04...","[24.15943, 36.6117, 33.56723, 16.43745, 16.401..."
50,5631532458a05c3c78db0145,Chicago Bulls,75,"[119.72, 119.72, 119.72, 119.72, 119.72, 119.7...",91.58,119.72,2014-10-29,21400010,New York Knicks,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, ...",...,"[203499, 2405, 203112, 203501, 204037, 2546, 2...",1,"[3.29813, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0...","[12.05, 12.05, 12.05, 12.05, 12.05, 12.05, 12....",13.97,12.05,"[-1, 1610612752, 1610612752, 1610612752, 16106...","[1414628876017, 1414628876017, 1414628876017, ...","[10.09885, 7.61414, 19.96256, 7.17221, 11.0445...","[34.96681, 8.61459, 34.43542, 16.63142, 34.798..."


In [185]:
db['Moments']

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'NBASD'), 'Moments')

In [209]:
class mongo_open:
    
    def __init__(self, db_name, coll_name):
        self.db_name = db_name
        self.coll_name = coll_name
    
    def __enter__(self,):
        self.client = MongoClient()
        self.db = self.client[self.db_name]
        self.coll = self.db[self.coll_name]
        return(self)
    
    def __exit__(self, exc_type, exc_value, traceback):
        self.client.close()        
    
    def query(self, conditions, fields={}):
        return(self.coll.find(conditions, fields))

In [196]:
with mongo_open('NBASD', 'Moments') as f1:
    print(f1.coll.count())

211045


In [210]:
with mongo_open('NBASD', 'Moments') as conn:
    m2 = [m for m in conn.query({'game_id': gid,
                                 'game_clock_start' : {'$ne' : 'None'}})]

In [211]:
len(m2)

261