### Build MemSQL Connection

In [2]:
import pymysql
import pandas 

HOST = "127.0.0.1"
PORT = 3306
USER = "root"
PASSWORD = ""
DATABASE = "nba"

conn = pymysql.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=DATABASE, charset='utf8mb4')

### Define Methods for Retrieving Each Phase of a Season

In [11]:
def get_games_for_season(season_start_yr, season_phase):
    """ Create a database and table for this benchmark to use. """
    season_start_yr_fmt = "%d-01-01" % season_start_yr
    cte_alias = ""

    if season_phase == "regular_season":
        cte_alias = "agirs"
        cte_name = "all_games_in_regular_season"
        cte = """
            all_games_in_regular_season AS (
                SELECT
                    *
                FROM
                    game_header gh,
                    season_dates sd
                WHERE
                    gh.game_date BETWEEN sd.regular_season_start AND sd.regular_season_end
            )
        """
    elif season_phase == "playoffs":
        cte_alias = "agip"
        cte_name = "all_games_in_playoffs"
        cte = """
            all_games_in_playoffs AS (
                SELECT
                    *
                FROM
                    game_header gh,
                    season_dates sd
                WHERE
                    gh.game_date BETWEEN sd.playoffs_start AND sd.playoffs_end
            )
        """
    elif season_phase == "finals":
        cte_alias = "agif"
        cte_name = "all_games_in_finals"
        cte = """
            all_games_in_finals AS (
            SELECT
                *
            FROM
                game_header gh,
                season_dates sd
            WHERE
                gh.game_date BETWEEN sd.finals_start AND sd.finals_end
        )
        """
    
    return pandas.read_sql_query(""" 
        WITH season_dates AS (
            SELECT
                *
            FROM season
            WHERE regular_season_start > "%(season_start_yr_fmt)s"
            ORDER BY regular_season_start ASC
            LIMIT 1
        ), %(cte)s
        SELECT
            %(cte_alias)s.game_date,
            %(cte_alias)s.natl_tv_broadcaster,
            ht.name home_team_name,
            at.name away_team_name,
            hls.pts home_team_pts,
            als.pts away_team_pts
        FROM
            %(cte_name)s %(cte_alias)s

        -- Get the home team information
        JOIN team ht ON
            %(cte_alias)s.home_team_id = ht.id
        JOIN line_score hls ON
            hls.game_id = %(cte_alias)s.game_id AND
            hls.team_id = ht.id

        -- Get the visiting team information
        JOIN team at ON
            %(cte_alias)s.away_team_id = at.id
        JOIN line_score als ON
            als.game_id = %(cte_alias)s.game_id AND
            als.team_id = at.id
        ORDER BY %(cte_alias)s.game_date DESC
    """ % {
        "season_start_yr_fmt": season_start_yr_fmt, 
        "cte": cte, 
        "cte_name": cte_name, 
        "cte_alias": cte_alias
    }, conn)

# Now Retrieve Each Phase of the Season for a Given Year

In [13]:
regular_season_games_df = get_games_for_season(2017, "regular_season")
playoff_games_df = get_games_for_season(2017, "playoffs")
finals_games_df = get_games_for_season(2017, "finals")

print(regular_season_games_df)
print(playoff_games_df)
print(finals_games_df)

       game_date natl_tv_broadcaster home_team_name away_team_name  \
0     2018-04-11                None       Clippers         Lakers   
1     2018-04-11                None       Pelicans          Spurs   
2     2018-04-11                None        Thunder      Grizzlies   
3     2018-04-11                None          Bulls        Pistons   
4     2018-04-11              NBA TV   Timberwolves        Nuggets   
5     2018-04-11                None          Magic        Wizards   
6     2018-04-11                None          Kings        Rockets   
7     2018-04-11                None        Celtics           Nets   
8     2018-04-11                ESPN          76ers          Bucks   
9     2018-04-11                ESPN  Trail Blazers           Jazz   
10    2018-04-11                None           Heat        Raptors   
11    2018-04-11                None      Cavaliers         Knicks   
12    2018-04-10                None         Pacers        Hornets   
13    2018-04-10    

In [2]:
import plotly.plotly as ply
from plotly.graph_objs import *

game_names = []
for i, gdf in regular_season_games_df.iterrows():
    game_name = '[%s] %s (%s) @ %s (%s)' % (
        gdf['game_date'], 
        gdf['home_team_name'], 
        gdf['home_team_pts'],
        gdf['away_team_name'], 
        gdf['away_team_pts']
    )
    game_names.append(game_name)

trace1 = Scatter(
     x=games_df['home_team_pts'],
     y=games_df['away_team_pts'],
     text=game_names,
     mode='markers'
)

layout = Layout(
     xaxis=XAxis( title='Home Team Points' ),
     yaxis=YAxis( type='log', title='Visitor Team Points' )
)

data = Data([trace1])
fig = Figure(data=data, layout=layout)
ply.iplot(fig, filename='Home Team Points v Away Team Points Comparison')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~NeilDahlke/0 or inside your plot.ly account where it is named 'Home Team Points v Away Team Points Comparison'


### Heatmap of point differentials for each team.

In [3]:
import plotly.plotly as ply
from plotly.graph_objs import *

heatmap_matrix = []
teams = {}

for i, gdf in games_df.iterrows():
    home_name = gdf['home_team_name']
    away_name = gdf['away_team_name']
    home_pts = gdf['home_team_pts']
    away_pts = gdf['away_team_pts']
    
    if home_name not in teams:
        teams[home_name] = {}
    
    if away_name not in teams:
        teams[away_name] = {}
        
    if away_name not in teams[home_name]:
        teams[home_name][away_name] = 0

    if home_name not in teams[away_name]:
        teams[away_name][home_name] = 0

    
    teams[away_name][home_name] += home_pts
    teams[home_name][away_name] += away_pts
    """
    if (away_name == "Warriors" or home_name == "Warriors") and \
        (away_name == "Cavaliers" or home_name == "Cavaliers"):
        print(away_name, away_pts, '@', home_name, home_pts)
    """

sorted_team_names = sorted(teams.keys())

layout = Layout(
     xaxis=XAxis(title='Home Team Name'),
     yaxis=YAxis(title='Away Team Name')
)

point_differentials = []
for x_team in sorted_team_names:
    z = []
    for y_team in sorted_team_names:
        if x_team == y_team:
            z.append(0)
        else:
            delta = teams[x_team][y_team] - teams[y_team][x_team]
            z.append(delta)
    point_differentials.append(z)

trace = Heatmap(
    x=sorted_team_names,
    y=sorted_team_names,
    z=point_differentials,
    colorscale='Blackbody'
)
data=[trace]
ply.iplot(data, filename='basic-heatmap')

In [6]:
def get_season_info_for_year(season_start_yr):
    """ Create a database and table for this benchmark to use. """
    season_start_yr_fmt = "%d-01-01" % season_start_yr
    return pandas.read_sql_query(""" 
        SELECT
            *
        FROM season
        WHERE regular_season_start > %s
        ORDER BY regular_season_start ASC
        LIMIT 1;
    """ % season_start_yr_fmt, conn)

season_info = get_season_info_for_year(1979)
print(season_info)

   id regular_season_start regular_season_end playoffs_start playoffs_end  \
0   1           1979-10-12         1980-03-30     1980-04-02   1980-04-30   

  finals_start  finals_end  
0   1980-05-04  1980-06-16  
