In [None]:
#import data and statistical packages
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn import linear_model as lm
import collections
%matplotlib inline

import warnings
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")

#import round regression analysis data from csv file, convert to data frame using pandas 
data1 = pd.read_csv('round_regression_data.csv', index_col=["Player ID"])
df_round = pd.DataFrame(data1)
df_round.head()

#assign data keys to regression equation variables 
y = df_round['Round'].values
x1 = df_round['Games'].values
x2 = df_round['Total Rebounds'].values
x3 = df_round['Free Throw Percentage'].values
x4 = df_round['Points Per Game'].values
x5 = df_round['Total Rebounds Per Game'].values

#assign round to dependent variable y, and the remaining variables to the X, add a constant of x to act as the regression the intercept value 
X = df_round[['Games', 'Total Rebounds', 'Free Throw Percentage', 'Points Per Game', 'Total Rebounds Per Game']].values
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
model.summary()

#fit data to model using LinearRegression tool from skylearn
y = df_round['Round'].values
X = df_round[['Games', 'Total Rebounds', 'Free Throw Percentage', 'Points Per Game', 'Total Rebounds Per Game']].values
model = lm.LinearRegression()
reg = model.fit(X,y)

#assign coefficent values and intercept value from the fitted model to variables corresponding with the data. 
#b represents the greek letter beta, the symbol used when forming models
#assign regression values to corresponding beta to be used to form model
b0 = reg.intercept_
b1 = reg.coef_[0]
b2 = reg.coef_[1]
b3 = reg.coef_[2]
b4 = reg.coef_[3]
b5 = reg.coef_[4]

#assign x input to be input entered by the user 
#input will be plugged into the regression equation to produce output of the predicted dependent variable 
x1 = float(input('Enter Number of Games: '))
x2 = float(input('Enter Total Rebounds Per Season: '))
x3 = float(input('Enter Free Throw Percentage as a Decimal: '))
x4 = float(input('Enter Points Per Game Average: '))
x5 = float(input('Enter Total Rebounds Per Game Average: '))

#write regression model using input variables 
y = ((b1*x1) + (b2*x2) + (b3*x3) + (b4*x4) + (b5*x5) + b0)

#print results of regression calculation
print("")
print('Regression Prediction Model: Predicted Round Value = %.4f(%.4f) + %.4f(%.4f) + %.4f(%.4f) + %.4f(%.4f) + %.4f(%.4f) + %.4f' % (b1,x1,b2,x2,b3,x3,b4,x4,b5,x5,b0))
print("")
print('Predicted Value Calculated is %.2f' % (y))
print("")

#print predicted outcome based on regression model results 
if 0.5 <= y <= 1.5:
    print('Therefore, we can predict the player will likely be drafted in the first round.')
elif 1.51 <= y <= 2.5:
    print('Therefore, we can predict player will most likely be drafted in the second round.')
else:
    print('Because the player was not predicted to be selected in the first or second round, it is unlikely the player will be drafted into the NBA.')

#repeat importing process for overall pick regression data
data2 = pd.read_csv('overall_pick_regression_data.csv', index_col=["Player ID"])
df_overall_pick = pd.DataFrame(data2)
df_overall_pick.head()

#reassign dependent and independent variables to new data variables and create new summary of data
y = df_overall_pick['Overall Pick'].values
X = df_overall_pick[['Games', 'Minutes Played', 'Free Throw Percentage', 'Minutes Per Game', 'Points Per Game', 'Total Rebounds Per Game']].values
X = sm.add_constant(X)
model = sm.OLS(y, X).fit()
model.summary()

#Reassign y and X to correspond with overall pick data and fit to new model
y = df_overall_pick['Overall Pick'].values
X = df_overall_pick[['Games', 'Minutes Played', 'Free Throw Percentage', 'Minutes Per Game', 'Points Per Game', 'Total Rebounds Per Game']].values
model = lm.LinearRegression()
reg = model.fit(X,y)

#repeat coefficient and intercept assingment process using the new regression coefficients and constant
b0 = reg.intercept_
b1 = reg.coef_[0]
b2 = reg.coef_[1]
b3 = reg.coef_[2]
b4 = reg.coef_[3]
b5 = reg.coef_[4]
b6 = reg.coef_[5]

print("")

#prompt user for second set of input to plug into the updated model
x1 = float(input('Enter Number of Games: '))
x2 = float(input('Enter Total Minutes Played: '))
x3 = float(input('Enter Free Throw Percentage as a Decimal: '))
x4 = float(input('Enter Minutes Per Game: '))
x5 = float(input('Enter Points Per Game Average: '))
x6 = float(input('Enter Total Rebounds Per Game Average: '))

#model new equation using new variable data
y = (b1*x1) + (b2*x2) + (b3*x3) + (b4*x4) + (b5*x5) + (b6*x6) + b0

#print results of regression model calculation
print("")
print('Regression Prediction Model: Predicted Overall Pick Value = %.4f(%.4f) + %.4f(%.4f) + %.4f(%.4f) + %.4f(%.4f) + %.4f(%.4f) + %.4f(%.4f) + %.4f' % (b1,x1,b2,x2,b3,x3,b4,x4,b5,x5,b6,x6,b0))
print("")
print('Predicted Value Calculated is %.2f' % (y))
print("")

#print predicted outcome based on the new regression model 
if 1 <= y <= 60:
    print('Player is predicted to be overall draft pick number %.0f' % (y))
else:
    print('Because the player was not predicted to be selected as a picked in the top 60, it is unlikely the player will be drafted into the NBA.')