**Web Scraping**

To get our data , I built a small web scrapper to get the data from espn. 



In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.stats import pearsonr
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import MultiComparison
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import math

#Current Year, 2020
#url = 'https://www.espn.com/nba/player/gamelog/_/id/3012/kyle-lowry'

#2019
url = 'https://www.espn.com/nba/player/gamelog/_/id/3012/type/nba/year/2019'

#2018
#url = 'https://www.espn.com/nba/player/gamelog/_/id/3012/type/nba/year/2018'


soup = BeautifulSoup(requests.get(url).content, 'html.parser')
columns = ['Date','OPP','Result','MIN','FG','FG%','3PT','3P%','FT','FT%','REB','AST','BLK','STL','PF','TO', 'PTS']

all_data = []
for row in soup.select('.Table__TR'):
    tds = [td.get_text(strip=True, separator=' ') for td in row.select('.Table__TD')]
    if len(tds) != 17:
        continue
    all_data.append(tds)

df = pd.DataFrame(all_data, columns=columns)




In [None]:
df.info()
df.dtypes

> **Read the data**

doing some basic changing of data types and previewing some of the data.


In [None]:
df.PTS = pd.to_numeric(df.PTS)
df.AST = pd.to_numeric(df.AST)
df.MIN = pd.to_numeric(df.MIN)
df.head()


Fistly, we display a scatterplot of the MIN vs PTS which also fits a linear model where the MIN acts as the independent variable(the variable we will be changing) and the PTS as the dependent variable(dependent on MIN, we are measuring PTS).

In [None]:
sns.lmplot(x="MIN", y="PTS", data=df)
plt.show()

In [None]:
sns.lmplot(x="MIN", y="AST", data=df)
plt.show()

In [None]:
print('Kyle Lowry Mean Points is %.3f' % df['PTS'].mean())

In [None]:
X = df[['MIN']]
Y = df[['PTS']]
#df = pd.get_dummies(df)

# Build linear regression model
lr_model = LinearRegression(fit_intercept=True, normalize=False)
lr_model.fit(X, Y)
sc = lr_model.score(X, Y)
print('R2 score: %.3f' % sc)

In [None]:
y_pred = lr_model.predict(X)
rmse = math.sqrt(mean_squared_error(Y, y_pred))
print('Root Mean Square Error is %.3f. Data STD is %.3f' % (rmse, Y.std()))

In [None]:
# Calculate Pearson correlation coefficient between the two variables
corr, _ = pearsonr(df['PTS'], df['AST'])
print('Pearson correlation coefficient: %.3f' % corr)