In [1]:
#Computer Science Capstone -C964 Task 2 Prompt C
#Python 3.7
#Student ID: 009101364
#Aamir Djearam

#Basketball Prediction Application v1.00

#Description: This application allows basketball player's scoring data to be read, explored and predicted.
#The application ingests csv files containing the players data and uses a supervised linear regression model to predict
#the players scores.

#The folder containing this application has been password protected and a read-me file can be found alongside the program.

#WARNING: Please do not rapidly use the dropdown menus as it is prone to erroring under fast use or from any loss in connection
#If any bugs are encountered, please "interupt the kernal" using the stop button, close and reopen the notebook,
#and restart the kernal using the ⏩ icon.

#Requirements: 
#industry-appropriate security features (password protection)
#tools to monitor and maintain the product (read-me and comments throughout the code)

In [2]:
#Import all dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random as rand
import ipywidgets as widgets
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import mean_squared_error

In [3]:
#Parsing Data from manually created CSV. New CSVs will need to be given a variable name and added to the following list.

BAdebayoData = pd.read_csv("player_data/Bam_Adebayo_Data.csv")
DBookerData = pd.read_csv("player_data/Devin_Booker_Data.csv")
JMurrayData = pd.read_csv("player_data/Jamal_Murray_Data.csv")
JTatumData = pd.read_csv("player_data/Jayson_Tatum_Data.csv")
JButlerData = pd.read_csv("player_data/Jimmy_Butler_Data.csv")
JEmbiidData = pd.read_csv("player_data/Joel_Embiid_Data.csv")
LJamesData = pd.read_csv("player_data/Lebron_James_Data.csv")
NJokicData = pd.read_csv("player_data/Nikola_Jokic_Data.csv")
PSiakamData = pd.read_csv("player_data/Pascal_Siakam_Data.csv")
SCurryData = pd.read_csv("player_data/Stephen_Curry_Data.csv")

#Requirements:
#collected or available datasets
#ability to support featurizing, parsing, cleaning, and wrangling datasets
#methods and algorithms supporting data exploration and preparation

In [4]:
#Reading a data set for an example. Jupyter Notebook allows the data to be viewed simply by writing the variable
#Please refer to the csv files inside of the "player_data" folder for more detailed information on each player

JMurrayData

Unnamed: 0,Game Number,Date,Points Per Game,Opponent,Win/Loss
0,1,19-Oct,12,UTA,L
1,2,22-Oct,16,OKC,W
2,3,24-Oct,8,POR,L
3,4,26-Oct,13,LAL,W
4,5,28-Oct,13,UTA,W
5,6,30-Oct,21,LAL,L
6,7,3-Nov,24,OKC,W
7,8,5-Nov,13,SAS,W
8,9,7-Nov,19,SAS,W
9,10,9-Nov,18,IND,W


In [5]:
#Method to parse player data and return relevant numpy arrays
def setPlayerData(playerData):
    gameNumber = playerData['Game Number'].to_numpy()
    points_per_game = playerData['Points Per Game'].to_numpy()
    
    return gameNumber, points_per_game

In [6]:
#Method to train Model with 30% of data saved for testing. Prints Mean Squared Error and Mean Absolute Error 
def trainModel(setPlayerData):
    x_train, x_test, y_train, y_test = train_test_split(setPlayerData[0], setPlayerData[1], test_size=0.3, random_state=42)
    x_train = x_train.reshape(-1, 1)
    y_train = y_train.reshape(-1, 1)
    x_test = x_test.reshape(-1, 1)
    y_test = y_test.reshape(-1, 1)

    #The linear regression model
    model = LinearRegression()
    model.fit(x_train, y_train)
    

    
    y_pred = model.predict(x_test)
    
    #Calculating and printing the "Mean Squared Error" and the "Mean Absolute Error"
    mse = mean_squared_error(y_test, y_pred)
    mae = metrics.mean_absolute_error(y_test, y_pred)
    print("Mean Squared Error: " + str(mse))
    print("Mean Absolute Error: " +  str(mae))
    
    
    return x_test, y_test, model

#Requirements:
#implementation of machine-learning methods and algorithms
#functionalities to evaluate the accuracy of the data product

In [7]:
#Example of executing the trainModel method
x = trainModel(setPlayerData(BAdebayoData))

Mean Squared Error: 52.15076883037605
Mean Absolute Error: 5.335246486163189


In [8]:
#Method to create Histogram showcasing how many games the player has with a certain amount of points
def createHistogram(playerData):
    number_of_games = playerData[1]
    bins = [1, 10, 20, 30, 40, 50, 60, 70]
    plt.hist(number_of_games, bins, rwidth = 0.6)
    plt.xlabel('Number of Games')
    plt.ylabel('Points per game')
    plt.title('Number of Games Catagorized By Points')
    plt.show()
    
#Requirements:
#data visualization functionalities for data exploration and inspection

In [9]:
#Method to create a Scatter Graph with the linear regression line
def createScatterGraph(trainModel):
    plt.scatter(trainModel[0], trainModel[1], color='blue', label='Actual')
    y_pred = trainModel[2].predict(trainModel[0])
    plt.plot(trainModel[0], y_pred, color='red', label='Predicted')
    plt.xlabel('Game Number')
    plt.ylabel('Points')
    plt.title('Points Per Game -Scatter Graph')
    plt.legend()
    plt.show()

In [10]:
#Method to create a Bar Graph with the linear regression line
def createBarGraph(playerData):
    plt.bar(playerData[0], playerData[1], label = "points", color = 'b', width = 0.5)
    x = trainModel(playerData)
    y_pred = x[2].predict(x[0])
    plt.plot(x[0], y_pred, color='red', label='Predicted')
    plt.xlabel('Game Number')
    plt.ylabel('Points')
    plt.title("Points Per Game -Bar Graph")
    plt.legend()
    plt.show()
    
#Requirements:
#a user-friendly, functional dashboard that includes at least three visualization types

In [11]:
#This method randomly chooses a game, predicts a value and compares the predicted value to the actual value.
#It then calculates whether the prediciton was within the acceptable criteria
def predictNext(setPlayerData):
    x_train, x_test, y_train, y_test = train_test_split(setPlayerData[0], setPlayerData[1], test_size=0.2, random_state=42)
    x_train = x_train.reshape(-1, 1)
    y_train = y_train.reshape(-1, 1)

    model = LinearRegression()
    model.fit(x_train, y_train)

    x_test = x_test.reshape(-1, 1)
    y_pred = model.predict(x_test)
    
    game = rand.randint(1, 49)
    
    predGameScore = int(model.predict([[game]]))
    predFuture = int(model.predict([[51]]))
    
    playerData = setPlayerData[1].tolist()
    
    if abs(playerData[game - 1] - predGameScore <= 20):
        delta = True
    else:
        delta = False
    
    
    
    print("Prediction for game " + str(game) + " is: " + str(predGameScore))
    print("The actual value for game " + str(game) + " is: " + str(playerData[game - 1]))
    print("Does the prediction meet the 20-point delta: " + str(delta))
    print()
    print("The prediction for the future game 51 is: " + str(predFuture))
    
#Requirements:
#decision-support functionality
#one descriptive method and one non-descriptive (predictive or prescriptive) method
#functionalities to evaluate the accuracy of the data product

In [12]:
#This method changes the output according to what was selected in "DROPBOX 1: HISTOGRAM"
def dropdown_change_Histogram(change):
    if change['new']:
        with thirdOutput:
            thirdOutput.clear_output()
            print(change['new'])
    if change['new'] == "Bam Adebayo":
        with thirdOutput:
            thirdOutput.clear_output()
            createHistogram(setPlayerData(BAdebayoData))
    if change['new'] == "Devin Booker":
        with thirdOutput:
            thirdOutput.clear_output()
            createHistogram(setPlayerData(DBookerData))
    if change['new'] == "Jamal Murray":
        with thirdOutput:
            thirdOutput.clear_output()
            createHistogram(setPlayerData(JMurrayData))
    if change['new'] == "Jayson Tatum":
        with thirdOutput:
            thirdOutput.clear_output()
            createHistogram(setPlayerData(JTatumData))
    if change['new'] == "Jimmy Butler":
        with thirdOutput:
            thirdOutput.clear_output()
            createHistogram(setPlayerData(JButlerData))
    if change['new'] == "Joel Embiid":
        with thirdOutput:
            thirdOutput.clear_output()
            createHistogram(setPlayerData(JEmbiidData))
    if change['new'] == "Lebron James":
        with thirdOutput:
            thirdOutput.clear_output()
            createHistogram(setPlayerData(LJamesData))
    if change['new'] == "Nikola Jokic":
        with thirdOutput:
            thirdOutput.clear_output()
            createHistogram(setPlayerData(NJokicData))
    if change['new'] == "Pascal Siakam":
        with thirdOutput:
            thirdOutput.clear_output()
            createHistogram(setPlayerData(PSiakamData))
    if change['new'] == "Stephen Curry":
        with thirdOutput:
            thirdOutput.clear_output()
            createHistogram(setPlayerData(SCurryData))
    if change['new'] == "":
        with thirdOutput:
            thirdOutput.clear_output()
            print('Please Select a Player.')
            
#Creating the Dropbox object
dropdownHistogram = widgets.Dropdown(
    options = ["", "Bam Adebayo", "Devin Booker", "Jamal Murray", "Jayson Tatum", "Jimmy Butler", "Joel Embiid", "Lebron James", "Nikola Jokic", "Pascal Siakam", "Stephen Curry"],
    value = "",
    description = "Player:",
    style = {"description_width": "initial"},
    layout = widgets.Layout(width = "250px")
)
            
#Observing the Dropbox object for changes and pulling the value
dropdownHistogram.observe(dropdown_change_Histogram, "value")

#Requirements:
#implementation of interactive queries

In [13]:
#This method changes the output according to what was selected in "DROPBOX 2: SCATTER GRAPH"
def dropdown_change_Scatter(change):
    if change['new']:
        with output:
            output.clear_output()
            print(change['new'])
    if change['new'] == "Bam Adebayo":
        with output:
            output.clear_output()
            createScatterGraph(trainModel(setPlayerData(BAdebayoData)))
    if change['new'] == "Devin Booker":
        with output:
            output.clear_output()
            createScatterGraph(trainModel(setPlayerData(DBookerData)))
    if change['new'] == "Jamal Murray":
        with output:
            output.clear_output()
            createScatterGraph(trainModel(setPlayerData(JMurrayData)))
    if change['new'] == "Jayson Tatum":
        with output:
            output.clear_output()
            createScatterGraph(trainModel(setPlayerData(JTatumData)))
    if change['new'] == "Jimmy Butler":
        with output:
            output.clear_output()
            createScatterGraph(trainModel(setPlayerData(JButlerData)))
    if change['new'] == "Joel Embiid":
        with output:
            output.clear_output()
            createScatterGraph(trainModel(setPlayerData(JEmbiidData)))
    if change['new'] == "Lebron James":
        with output:
            output.clear_output()
            createScatterGraph(trainModel(setPlayerData(LJamesData)))
    if change['new'] == "Nikola Jokic":
        with output:
            output.clear_output()
            createScatterGraph(trainModel(setPlayerData(NJokicData)))
    if change['new'] == "Pascal Siakam":
        with output:
            output.clear_output()
            createScatterGraph(trainModel(setPlayerData(PSiakamData)))
    if change['new'] == "Stephen Curry":
        with output:
            output.clear_output()
            createScatterGraph(trainModel(setPlayerData(SCurryData)))
    if change['new'] == "":
        with output:
            output.clear_output()
            print('Please Select a Player.')

#Creating the Dropbox object
dropdownScatter = widgets.Dropdown(
    options = ["", "Bam Adebayo", "Devin Booker", "Jamal Murray", "Jayson Tatum", "Jimmy Butler", "Joel Embiid", "Lebron James", "Nikola Jokic", "Pascal Siakam", "Stephen Curry"],
    value = "",
    description = "Player:",
    style = {"description_width": "initial"},
    layout = widgets.Layout(width = "250px")
)
       
#Observing the Dropbox object for changes and pulling the value
dropdownScatter.observe(dropdown_change_Scatter, "value")

In [14]:
#This method changes the output according to what was selected in "DROPBOX 3: BAR GRAPH"
def dropdown_change_BarGraph(change):
    if change['new']:
        with secondOutput:
            secondOutput.clear_output()
            print(change['new'])
    if change['new'] == "Bam Adebayo":
        with secondOutput:
            secondOutput.clear_output()
            createBarGraph(setPlayerData(BAdebayoData))
    if change['new'] == "Devin Booker":
        with secondOutput:
            secondOutput.clear_output()
            createBarGraph(setPlayerData(DBookerData))
    if change['new'] == "Jamal Murray":
        with secondOutput:
            secondOutput.clear_output()
            createBarGraph(setPlayerData(JMurrayData))
    if change['new'] == "Jayson Tatum":
        with secondOutput:
            secondOutput.clear_output()
            createBarGraph(setPlayerData(JTatumData))
    if change['new'] == "Jimmy Butler":
        with secondOutput:
            secondOutput.clear_output()
            createBarGraph(setPlayerData(JButlerData))
    if change['new'] == "Joel Embiid":
        with secondOutput:
            secondOutput.clear_output()
            createBarGraph(setPlayerData(JEmbiidData))
    if change['new'] == "Lebron James":
        with secondOutput:
            secondOutput.clear_output()
            createBarGraph(setPlayerData(LJamesData))
    if change['new'] == "Nikola Jokic":
        with secondOutput:
            secondOutput.clear_output()
            createBarGraph(setPlayerData(NJokicData))
    if change['new'] == "Pascal Siakam":
        with secondOutput:
            secondOutput.clear_output()
            createBarGraph(setPlayerData(PSiakamData))
    if change['new'] == "Stephen Curry":
        with secondOutput:
            secondOutput.clear_output()
            createBarGraph(setPlayerData(SCurryData))
    if change['new'] == "":
        with secondOutput:
            secondOutput.clear_output()
            print('Please Select a Player.')
        
#Creating the Dropbox object
dropdownBarGraph = widgets.Dropdown(
    options = ["", "Bam Adebayo", "Devin Booker", "Jamal Murray", "Jayson Tatum", "Jimmy Butler", "Joel Embiid", "Lebron James", "Nikola Jokic", "Pascal Siakam", "Stephen Curry"],
    value = "",
    description = "Player:",
    style = {"description_width": "initial"},
    layout = widgets.Layout(width = "250px")
)
  
#Observing the Dropbox object for changes and pulling the value
dropdownBarGraph.observe(dropdown_change_BarGraph, "value")

In [15]:
#This method changes the output according to what was selected in "DROPBOX 4: PREDICTIVE METHOD"
def dropdown_change_Predict(change):
    if change['new']:
        with fourthOutput:
            fourthOutput.clear_output()
            print(change['new'])
    if change['new'] == "Bam Adebayo":
        with fourthOutput:
            fourthOutput.clear_output()
            predictNext(setPlayerData(BAdebayoData))
    if change['new'] == "Devin Booker":
        with fourthOutput:
            fourthOutput.clear_output()
            predictNext(setPlayerData(DBookerData))
    if change['new'] == "Jamal Murray":
        with fourthOutput:
            fourthOutput.clear_output()
            predictNext(setPlayerData(JMurrayData))
    if change['new'] == "Jayson Tatum":
        with fourthOutput:
            fourthOutput.clear_output()
            predictNext(setPlayerData(JTatumData))
    if change['new'] == "Jimmy Butler":
        with fourthOutput:
            fourthOutput.clear_output()
            predictNext(setPlayerData(JButlerData))
    if change['new'] == "Joel Embiid":
        with fourthOutput:
            fourthOutput.clear_output()
            predictNext(setPlayerData(JEmbiidData))
    if change['new'] == "Lebron James":
        with fourthOutput:
            fourthOutput.clear_output()
            predictNext(setPlayerData(LJamesData))
    if change['new'] == "Nikola Jokic":
        with fourthOutput:
            fourthOutput.clear_output()
            predictNext(setPlayerData(NJokicData))
    if change['new'] == "Pascal Siakam":
        with fourthOutput:
            fourthOutput.clear_output()
            predictNext(setPlayerData(PSiakamData))
    if change['new'] == "Stephen Curry":
        with fourthOutput:
            fourthOutput.clear_output()
            predictNext(setPlayerData(SCurryData))
    if change['new'] == "":
        with fourthOutput:
            fourthOutput.clear_output()
            print('Please Select a Player.')
        
#Creating the Dropbox object
dropdownPredict = widgets.Dropdown(
    options = ["", "Bam Adebayo", "Devin Booker", "Jamal Murray", "Jayson Tatum", "Jimmy Butler", "Joel Embiid", "Lebron James", "Nikola Jokic", "Pascal Siakam", "Stephen Curry"],
    value = "",
    description = "Player:",
    style = {"description_width": "initial"},
    layout = widgets.Layout(width = "250px")
)
            
#Observing the Dropbox object for changes and pulling the value
dropdownPredict.observe(dropdown_change_Predict, "value")

In [16]:
thirdOutput = widgets.Output()

#Calling the Dropbox object
Histogram = widgets.VBox([dropdownHistogram, thirdOutput])
print('DROPDOWN 1: HISTOGRAM')
Histogram

DROPDOWN 1: HISTOGRAM


VBox(children=(Dropdown(description='Player:', layout=Layout(width='250px'), options=('', 'Bam Adebayo', 'Devi…

In [17]:
output = widgets.Output()

#Calling the Dropbox object
Scatterbox = widgets.VBox([dropdownScatter, output])
print("DROPDOWN 2: SCATTER GRAPH")
Scatterbox

DROPDOWN 2: SCATTER GRAPH


VBox(children=(Dropdown(description='Player:', layout=Layout(width='250px'), options=('', 'Bam Adebayo', 'Devi…

In [18]:
secondOutput = widgets.Output()

#Calling the Dropbox object
BarGraph = widgets.VBox([dropdownBarGraph, secondOutput])
print('DROPDOWN 3: BAR GRAPH')
BarGraph

DROPDOWN 3: BAR GRAPH


VBox(children=(Dropdown(description='Player:', layout=Layout(width='250px'), options=('', 'Bam Adebayo', 'Devi…

In [19]:
fourthOutput = widgets.Output()

#Calling the Dropbox object
Predict = widgets.VBox([dropdownPredict, fourthOutput])
print('DROPDOWN 4: PREDICTIVE METHOD')
Predict

DROPDOWN 4: PREDICTIVE METHOD


VBox(children=(Dropdown(description='Player:', layout=Layout(width='250px'), options=('', 'Bam Adebayo', 'Devi…