In [7]:
import pandas as pd 
from sklearn.linear_model import LogisticRegression 
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from PIL import Image
import matplotlib.gridspec as gridspec
import io
import urllib
import plotly.express as px
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
import dash
from dash import html, Input, Output, dcc
import base64
from cairosvg import svg2png

In [2]:
csv_list = os.listdir("data/nba_games_2021/")
cols1 = {'away_winpct':[], 'home_winpct':[], 'home_win':[]}
pregame_data = pd.DataFrame(cols1)
cols2 = {'away_pts':[], 'home_pts':[], 'seconds_rem':[], 'home_win':[]}
ingame_data = pd.DataFrame(cols2)
example_games = []
example_teams = []
unique_teams = set()
for i, csv in enumerate(csv_list):
    
    #print(csv)
    if (csv == "urls2021"): continue
    sample = pd.DataFrame(cols2)
    game = pd.read_csv(f'data/nba_games_2021/{csv}')
    home = game.columns[1]
    away = game.columns[0]
    home_record = game.loc[0][1]
    away_record = game.loc[0][0]
    if (type(home_record)!=type("hi") or type(away_record)!=type("hi")): continue
    home_winpct = int(home_record.split("-")[0]) / (int(home_record.split("-")[0])+int(home_record.split("-")[1]))
    away_winpct = int(away_record.split("-")[0]) / (int(away_record.split("-")[0])+int(away_record.split("-")[1]))   
    if (game.iloc[-2][0].startswith("J")):
        final_score = final_score = (int(game.iloc[-3][1]), int(game.iloc[-3][0]))
    else: final_score = (int(game.iloc[-2][1]), int(game.iloc[-2][0]))
    home_win = 1 if final_score[0] > final_score[1] else 0
    row = [away_winpct, home_winpct, home_win]
    pregame_data.loc[len(pregame_data.index)] = row

    game = game.drop(labels = 0, axis = 0)
    def time_to_seconds(time_str):
        minutes, seconds = 0, 0
        if 'M' in time_str:
            splitter = time_str.split('M')
            minutes = int(splitter[0])
            seconds = int(splitter[1].split('S')[0])
        else:
            seconds = int(time_str.split('S')[0])
        return minutes * 60 + seconds
    game['seconds_rem'] = game['time_rem'].apply(time_to_seconds)
    game = game.drop(columns = ['time_rem'])
    game = game.drop(labels = len(game), axis = 0)
    game['home_win'] = home_win
    game = game.rename(columns ={away:'away_pts', home:'home_pts'})
    game = game[~game['away_pts'].str.startswith(("S", "E", "J"))]
    game['away_pts'] = game['away_pts'].astype(int)
    game['home_pts'] = game['home_pts'].astype(int)
    ingame_data = pd.concat([ingame_data, game])
    if (home not in unique_teams):
        sample = pd.concat([sample, game])#sample.append(game)
        example_home = home
        example_away = away
        example_games.append(sample)
        example_teams.append((example_away, example_home))
        unique_teams.add(home)
    #print(ingame_data)


In [3]:
#Pregame
predictors = pregame_data.drop(columns=["home_win"])
response = pregame_data["home_win"]
train_data, test_data, train_target, test_target = train_test_split(
    predictors, response, test_size=0.8, random_state=999)

model = LogisticRegression() 
model.fit(train_data, train_target) 
pregame_win_probabilities = model.predict_proba(test_data)
print(pregame_win_probabilities)


[[0.62749877 0.37250123]
 [0.36363618 0.63636382]
 [0.35337592 0.64662408]
 ...
 [0.49028585 0.50971415]
 [0.36551216 0.63448784]
 [0.60584598 0.39415402]]


In [4]:
#Ingame
pred = ingame_data.drop(columns=["home_win"])
resp = ingame_data["home_win"]
x_train, x_test, y_train, y_test = train_test_split(
    pred, resp, test_size=0.8, random_state=999)
ingame_model = LogisticRegression() 
ingame_model.fit(x_train, y_train) 
testing_games = {}
sample_games = []
for i, game in enumerate(example_games):
    x_example = game.drop(columns=['home_win'])
    example_prob = ingame_model.predict_proba(x_example)
    key = example_teams[i][0]+" vs. "+example_teams[i][1]
    sample_games.append(key)
    x_example['prob'] = example_prob[:,1]
    testing_games[key] = x_example



In [8]:

# logos
base_url = "https://cdn.nba.com/logos/nba/16106127"
url_end = "/primary/L/logo.svg"
number = 37
teams = ["Atlanta", "Boston", "Cleveland", "New Orleans", "Chicago", "Dallas", "Denver", 
         "Golden State", "Houston", "LA Clippers", "LA Lakers", "Miami", "Milwaukee", "Minnesota",
         "Brooklyn", "New York", "Orlando", "Indiana", "Philadelphia", "Phoenix", "Portland",
         "Sacramento", "San Antonio", "Oklahoma City", "Toronto", "Utah", "Memphis",
         "Washington", "Detroit", "Charlotte"]

team_logo = {}



for i in range(30):
    url = base_url + str(number+i)+url_end
    img = svg2png(url = base_url+str(number+i)+url_end)
    Im = Image.open(io.BytesIO(img))
    Im = Im.save(f"data/logos/{teams[i]}.png")

In [13]:
team_color = {}
colors = ['#e03a3e', '#007a33', '#860038', '#0c2340', '#ce1141', 
          '#00538c', '#fec524', '#1d428a', '#ce1141', '#c8102e', 
          '#552583', '#98002e', '#00471b', '#0c2340', '#000000', 
          '#f58426', '#0077c0', '#002d62', '#006bb6', '#e56020', 
          '#e03a3e', '#5a2d81', '#c4ced4', '#007ac1', '#ce1141', 
          '#002b5c', '#5d76a9', '#002b5c', '#c8102e', '#00788c']

for i in range(30):
    logo = OffsetImage(plt.imread(f"data/logos/{teams[i]}.png"), zoom=0.1)
    team_logo[teams[i]] = logo
    team_color[teams[i]] = colors[i]

In [14]:
app = JupyterDash(__name__)

#test_base64 = base64.b64encode(open("data/logos/Golden State.png", 'rb').read()).decode('ascii')
#Need to incorporate team color for probability line, and also 
app.layout = html.Div(children = [html.Div([
    dcc.Dropdown(
        id="dropdown",
        options=sample_games,
        value=sample_games[0],
        clearable=False,
    ),
    #html.Img(src='data:image/png;base64,{}'.format(test_base64)),
    dcc.Graph(id="graph")]),
    html.Div([
    #html.Img(src='data:image/png;base64,{}'.format(test_base64)),
    dcc.Graph(id="graph1")])])


@app.callback(
    #Output('graph1', 'figure'),
    Output("graph", "figure"), 
    Input("dropdown", "value"))
def plot_probabilities(game):
    fig = px.line(testing_games[game], x = 'seconds_rem', y = 'prob', labels={
                     "seconds_rem": "Seconds Remaining",
                     "prob": "Win Probability",
                 },
                title=game)
    home_team = game.split(". ")[1]

    img = base64.b64encode(open(f"data/logos/{home_team}.png", 'rb').read()).decode('ascii')
        #add the prefix that plotly will want when using the string as source
    encoded_image = "data:image/png;base64," + img
    fig['data'][0]['line']['color']=team_color[home_team]
    fig.add_layout_image(
            row=0,
            col=1,
            source=encoded_image,
            #source = "data/logos/Orlando.png",
            xref="x domain",
            yref="y domain",
            x=0,
            y=1,
            # xanchor="left",
            # yanchor="top",
            sizex=0.2,
            sizey=0.2,
        )

    fig.update_xaxes(autorange="reversed")
    return fig

@app.callback(
    Output("graph1", "figure"), 
    Input("dropdown", "value"))
def plot_score(game):
    away_team = game.split(" vs. ")[0]
    home_team = game.split(" vs. ")[1]
    fig = px.line(testing_games[game], x = 'seconds_rem', y = ['home_pts', 'away_pts'],
                title=game)
    def custom_legend_name(new_names):
        for i, new_name in enumerate(new_names):
            fig.data[i].name = new_name
    custom_legend_name([home_team, away_team])
    fig.update_layout(
    title=game, xaxis_title="Seconds Remaining", yaxis_title="Score"
    )
    fig['data'][0]['line']['color']=team_color[home_team]
    fig['data'][1]['line']['color']=team_color[away_team]
    img = base64.b64encode(open(f"data/logos/{home_team}.png", 'rb').read()).decode('ascii')
    encoded_image = "data:image/png;base64," + img
    fig.add_layout_image(
            row=0,
            col=1,
            source=encoded_image,
            xref="x domain",
            yref="y domain",
            x=0,
            y=1,
            sizex=0.2,
            sizey=0.2,
        )
        
    
    fig.update_xaxes(autorange="reversed")
    return fig

In [15]:
app.run_server()

Dash app running on http://127.0.0.1:8050/
