# NHL Highlights

This notebook makes use of the different URLs obtained from the following thread: http://hfboards.hockeysfuture.com/showthread.php?t=1596119

Using these links, I use BeautifulSoup to scrape data from the NHL websites to produce a Markdown format recap and boxscore of a given day's NHL scores and highlights. This markdown can then be copy pasted on websites such as Reddit.

TODO:
* The next step would be to automate this script for daily posting on Reddit

In [1]:
from bs4 import BeautifulSoup
from urllib import urlopen
import json
import pprint
import collections
import requests
import time

In [2]:
# For the time being, the user must specify the date as a string in yyyy-MM-dd format
date = "2017-03-04"

In [3]:
# This function will take a date and return a list of game dictionaries
# Game dictionaries are defined as follows: {id, {desc, score}}
# INPUT: date as a string in yyyy-MM-dd format
# OUTPUT: list of game dictionaries
def getDailyGames(date):
    base_sb_url = "http://live.nhle.com/GameData/GCScoreboard/" + date + ".jsonp"
    sb_html = urlopen(base_sb_url).read()
    sb_json = json.loads(sb_html[15:-2])
    games = collections.OrderedDict()
    for game in sb_json['games']:
        g = {}
        gid = str(game['id'])
        ht = game['htn'] + " " + game['htcommon']
        at = game['atn'] + " " + game['atcommon']
        score = str(game['ats']) + "-" + str(game['hts'])
        g['desc'] = at + " @ " + ht
        g['score'] = score
        games[gid] = g
    
    return games

In [4]:
games = getDailyGames(date)
pprint.pprint(games.items())

[('2016020952',
  {'desc': u'NEW JERSEY DEVILS @ BOSTON BRUINS', 'score': '2-3'}),
 ('2016020953',
  {'desc': u'TAMPA BAY LIGHTNING @ BUFFALO SABRES', 'score': '2-1'}),
 ('2016020954',
  {'desc': u'COLUMBUS BLUE JACKETS @ OTTAWA SENATORS', 'score': '2-3'}),
 ('2016020955', {'desc': u'DALLAS STARS @ FLORIDA PANTHERS', 'score': '2-1'}),
 ('2016020956',
  {'desc': u'MONTREAL CANADIENS @ NY RANGERS RANGERS', 'score': '4-1'}),
 ('2016020958',
  {'desc': u'COLORADO AVALANCHE @ WINNIPEG JETS', 'score': '1-6'}),
 ('2016020957',
  {'desc': u'PHILADELPHIA FLYERS @ WASHINGTON CAPITALS', 'score': '1-2'}),
 ('2016020959',
  {'desc': u'CHICAGO BLACKHAWKS @ NASHVILLE PREDATORS', 'score': '5-3'}),
 ('2016020960',
  {'desc': u'DETROIT RED WINGS @ EDMONTON OILERS', 'score': '3-4'}),
 ('2016020961',
  {'desc': u'VANCOUVER CANUCKS @ LOS ANGELES KINGS', 'score': '4-3'})]


In [5]:
def getGameDetails(games):
    for game in games.items():
        gid = game[0]
        
        # Create the required URLs based on the game ID
        gcbx_url = "http://live.nhle.com/GameData/20162017/" + gid + "/gc/gcbx.jsonp"
        gcgm_url = "http://live.nhle.com/GameData/20162017/" + gid + "/gc/gcgm.jsonp"
        
        # Read entire source HTML
        gcbx_html = urlopen(gcbx_url).read()
        gcgm_html = urlopen(gcgm_url).read()

        # Extract the JSON data (ignoring the prefix.load())
        gcbx_json = json.loads(gcbx_html[10:-1])
        gcgm_json = json.loads(gcgm_html[10:-1])
        
        # Parse the two JSON data and create a list of goals with the following information:
        #   - ID
        #   - Scoring team
        #   - Period
        #   - Time of goal (in seconds)
        #   - Description
        #   - Neulion ID
        #   - Video link (null for now)
        #   - Streamable link (null for now)

        # I use an ordered dictionary to make it easy for timekeeping
        goals = collections.OrderedDict()

        # First get the goal information from the game
        for period in gcbx_json['goalSummary']:
            if 'goals' in period:
                for goal in period['goals']:
                    g = {}
                    g['team'] = goal['t1']
                    g['period'] = goal['p']
                    g['time'] = goal['sip']
                    prefix = ""
                    if (goal['sc'] == 702):
                        prefix ="(PPG) "
                    elif (goal['sc'] == 703):
                        prefix ="(SHG) "
                    g['desc'] = prefix + goal['desc']
                    g['neulionId'] = ""
                    g['video'] = ""
                    g['streamable'] = ""
                    goals[goal['id']] = g
     
        # Then add in the Neulion ID
        for e in gcgm_json['video']['events']:
            if 'id' in e:
                id = e['id']
                for f in e['feeds']:
                    if str(id) in goals:
                        goals[str(id)]['neulionId'] = f['neulionId']

        # Now we will go through each goal and get it's video link
        # We can get this from the HTML source of the video highlight
        # The video highlight URL can be obtained from appending the NeulionID to a base URL
        # ----
        # Subsequently we will also upload the videos to streamable and store the resulting url
        base_video_url = 'https://www.nhl.com/video/c-'
        base_api_url = 'https://api.streamable.com/import?url='
        streamable_url = 'https://streamable.com/'

        for goal in goals.items():
            nid = goal[1]['neulionId']
            if nid is None or nid == "":
                continue
            url = base_video_url + str(nid)
            html = urlopen(url)
            soup = BeautifulSoup(html)
            videos = soup.findAll('meta', {"itemprop":"contentURL"})
            goals[goal[0]]['video'] = videos[0]['content']

#             api_url = base_api_url + videos[0]['content']
#             r = requests.get(api_url)
#             goals[goal[0]]['streamable'] = streamable_url + r.json()['shortcode']
            goals[goal[0]]['streamable'] = videos[0]['content']
            
        # Now we will update the goals for this game
        games[gid]['goals'] = goals

In [6]:
getGameDetails(games)

In [7]:
pprint.pprint(games.items())

[('2016020952',
  {'desc': u'NEW JERSEY DEVILS @ BOSTON BRUINS',
   'goals': OrderedDict([(u'235', {'streamable': 'http://md-akc.med.nhl.com/mp4/nhl/2017/03/05/e899cc94-b38e-45de-a80a-644050b568c3/1488676762822/asset_1800k.mp4', 'neulionId': 49998103, 'team': u'BOS', 'period': 2, 'video': 'http://md-akc.med.nhl.com/mp4/nhl/2017/03/05/e899cc94-b38e-45de-a80a-644050b568c3/1488676762822/asset_1800k.mp4', 'time': 426, 'desc': u'(PPG) Torey Krug (6) Slap Shot, assists: David Pastrnak (28), Patrice Bergeron (25)'}), (u'406', {'streamable': 'http://md-akc.med.nhl.com/mp4/nhl/2017/03/05/832c0daa-f55e-4e3e-9332-d4480a983c7d/1488677644743/asset_1800k.mp4', 'neulionId': 50000003, 'team': u'NJD', 'period': 2, 'video': 'http://md-akc.med.nhl.com/mp4/nhl/2017/03/05/832c0daa-f55e-4e3e-9332-d4480a983c7d/1488677644743/asset_1800k.mp4', 'time': 708, 'desc': u'Devante Smith-Pelly (4) Backhand, assists: Steven Santini (5), Kyle Palmieri (22)'}), (u'426', {'streamable': 'http://md-akc.med.nhl.com/mp4/nhl/2

In [8]:
# I define a function to print the scoring summary in markdown format for posting on reddit
# Currently this function takes the goals dictionary as input
# It will output markdown text that could be pasted directly to reddit
def print_ss_markdown(goals):
    print "\n"
    print "Period|Time|Team|Point getters"
    print "|:-:|:-:|:-:|:-:|"
    for goal in goals.items():
        period = str(goal[1]['period'])
        period_time = time.strftime("%M:%S", time.gmtime(goal[1]['time']))
        team = goal[1]['team']
        
        # Sometimes the goal URL might not be uploaded so let's make sure it exists first
        if goal[1]['streamable'] is None or goal[1]['streamable'] == "":
            points = goal[1]['desc'] + goal[1]['streamable']
        else:
            points = "[" + goal[1]['desc'] + "](" + goal[1]['streamable'] + ")"
            
        print period + "|" + period_time + "|" + team + "|" + points
    print "\n"

In [9]:
for game in games.items():
    print "___"
    print "##" + game[1]['desc']
    print "# Final score: " + game[1]['score']
    print_ss_markdown(game[1]['goals'])

___
##NEW JERSEY DEVILS @ BOSTON BRUINS
# Final score: 2-3


Period|Time|Team|Point getters
|:-:|:-:|:-:|:-:|
2|07:06|BOS|[(PPG) Torey Krug (6) Slap Shot, assists: David Pastrnak (28), Patrice Bergeron (25)](http://md-akc.med.nhl.com/mp4/nhl/2017/03/05/e899cc94-b38e-45de-a80a-644050b568c3/1488676762822/asset_1800k.mp4)
2|11:48|NJD|[Devante Smith-Pelly (4) Backhand, assists: Steven Santini (5), Kyle Palmieri (22)](http://md-akc.med.nhl.com/mp4/nhl/2017/03/05/832c0daa-f55e-4e3e-9332-d4480a983c7d/1488677644743/asset_1800k.mp4)
2|18:25|BOS|[Brandon Carlo (6) Backhand, assists: Patrice Bergeron (26), David Backes (18)](http://md-akc.med.nhl.com/mp4/nhl/2017/03/05/acaa7072-87c7-4073-b9e8-5d903b49d238/1488678532516/asset_1800k.mp4)
3|05:51|NJD|[Kyle Palmieri (20) Snap Shot, assists: Taylor Hall (28)](http://md-akc.med.nhl.com/mp4/nhl/2017/03/05/9c8f5402-dee6-4450-9184-9bbce718016c/1488680562894/asset_1800k.mp4)
3|08:18|BOS|[Ryan Spooner (11) Wrist Shot, assists: Drew Stafford (10), Frank Vatr