# Running Back Season Statistic Modeling

### Load Libraries

In [33]:
import os
import sys

import IPython
import ipywidgets as widgets
import matplotlib.pyplot as plt
import mglearn
import numpy as np
import pandas as pd
from IPython.display import HTML, display
from ipywidgets import interact, interactive
from sklearn.dummy import DummyClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import cross_val_score, cross_validate, train_test_split
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from utils import *

%matplotlib inline
pd.set_option("display.max_colwidth", 200)

### Set Path

In [35]:
# Navigate two folders up
#os.chdir(os.path.join(os.getcwd(), ".."))

os.chdir("/Users/dusty/Desktop/projects/sports_analytics.nosync/fantasy_wizard/")

# Print the new working directory
print("Working In:", os.getcwd())

Working In: /Users/dusty/Desktop/projects/sports_analytics.nosync/fantasy_wizard


### Load Data

In [37]:
# Load rb data
rb_data = pd.read_csv(os.path.join(os.getcwd(), "data", "processed", "season", "rb_season_stat_modeling_data.csv"))

In [38]:
rb_data.head()

Unnamed: 0,player_id,player_display_name,target_season,games_played,seasons_played,career_carries,career_carries_pg,career_rushing_yd,career_rushing_ypg,career_rushing_td,...,prev_season_receiving_epa,prev_season_receiving_epa_pg,target_carries,target_rushing_yd,target_rushing_td,target_rushing_fb_lst,target_receptions,target_receiving_yd,target_receiving_td,target_receiving_fb_lst
0,00-0022178,Willis McGahee,2013,137,9,2068,15.094891,8505,62.080292,68,...,6.838503,0.68385,138,377,2,1,8,20,0,0
1,00-0022736,Steven Jackson,2013,133,9,2417,18.172932,10215,76.804511,56,...,6.663583,0.416474,157,543,6,0,33,191,1,0
2,00-0022897,Greg Jones,2013,87,8,272,3.126437,926,10.643678,10,...,0.508246,0.063531,2,2,0,0,5,34,0,0
3,00-0023437,Ronnie Brown,2013,104,8,1229,11.817308,5187,49.875,37,...,-3.610883,-0.25792,54,234,2,0,13,87,0,0
4,00-0023500,Frank Gore,2013,121,8,2004,16.561983,9321,77.033058,55,...,8.4749,0.446047,324,1292,10,3,19,177,0,0


### Split into Modeling and Target Data

In [40]:
# Get Target Columns
target_columns = ['target_carries',
                  'target_rushing_yd',
                  'target_rushing_td',
                  'target_rushing_fb_lst',
                  'target_receptions',
                  'target_receiving_yd',
                  'target_receiving_td',
                  'target_receiving_fb_lst']

# Separate Test Data
external_data = rb_data.query("target_season == 2022")

# Separate Modeling Data
modeling_data = rb_data.query("target_season != 2022").drop(columns=target_columns)
test_data = rb_data.query("target_season != 2022")[target_columns]

# Split into Train and Test
X_train, X_test, y_train, y_test = train_test_split(
    modeling_data, test_data, train_size=0.8, test_size=0.2, random_state=42
)

In [27]:
modeling_data.shape

(0, 54)

In [21]:
external_data

Unnamed: 0,player_id,player_display_name,target_season,games_played,seasons_played,career_carries,career_carries_pg,career_rushing_yd,career_rushing_ypg,career_rushing_td,...,prev_season_receiving_epa,prev_season_receiving_epa_pg,target_carries,target_rushing_yd,target_rushing_td,target_rushing_fb_lst,target_receptions,target_receiving_yd,target_receiving_td,target_receiving_fb_lst
0,00-0027966,Mark Ingram,2022,152,11,1826,12.013158,8177,53.796053,65,...,-2.091120,-0.149366,62,233,1,1,16,68,0,1
1,00-0029239,Brandon Bolden,2022,79,9,295,3.734177,1343,17.000000,13,...,18.554955,1.091468,17,66,0,0,9,57,1,0
2,00-0030288,Rex Burkhead,2022,88,8,494,5.613636,1978,22.477273,20,...,3.539262,0.252804,26,80,0,0,37,204,1,0
3,00-0030456,Giovani Bernard,2022,130,9,963,7.407692,3878,29.830769,23,...,-3.489711,-0.290809,8,28,0,0,2,-1,0,0
4,00-0030513,Latavius Murray,2022,122,8,1356,11.114754,5629,46.139344,51,...,-0.533062,-0.038076,171,760,6,0,27,132,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,00-0036906,Khalil Herbert,2022,14,1,103,7.357143,433,30.928571,2,...,-0.765520,-0.054680,129,731,4,0,9,57,1,0
106,00-0036919,Kenneth Gainwell,2022,17,1,69,4.058824,297,17.470588,5,...,9.850296,0.579429,86,421,5,0,30,224,0,0
107,00-0036924,Michael Carter,2022,14,1,147,10.500000,639,45.642857,4,...,-5.296104,-0.378293,114,402,3,1,41,288,0,0
108,00-0036984,Trey Sermon,2022,5,1,41,8.200000,167,33.400000,1,...,1.588195,0.317639,2,19,0,0,0,0,0,0
