In [7]:
from pipeline import PrepPipe, OffensiveFormation, DefensiveClustering, FeatureSelector
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import FunctionTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd

class SideNotValidError(Exception):
    def __init__(self, message="Side not valid. Choose 'off', 'def', or 'both'"):
        self.message = message
        super().__init__(self.message)

class FullPipeWrapper(PrepPipe):
    def __init__(self, first=1, last=14, n_cuts=11, frameLimit=11,
                 simMethod='distance', quad_num=4, def_fp='assets/def_clean_output.csv'):
        super().__init__(first, last, n_cuts, frameLimit, simMethod,
                         quad_num, def_fp)

    def extract_data_cols(self):
        self.X_train, self.X_test, y_train, y_test = self.clean_data()
        self.y_train_x = y_train.iloc[:, 0]
        self.y_train_y = y_train.iloc[:, 1]
        self.y_test_x = y_test.iloc[:, 0]
        self.y_test_y = y_test.iloc[:, 1]
        self.off_col = self.train_test.ofc.drop(['gameId', 'playId', 'gamePlayId', 'week'], axis=1).columns
        self.def_col = self.train_test.dfc.drop(['week', 'index'], axis=1).columns
        self.off_info_cols = self.off_col[-9:]
        self.off_form_cols = self.off_col[:-9]

    def build_pipe(self, side='both', model=LogisticRegression()):
        if not hasattr(self, "X_train"):
            self.extract_data_cols()
        off_pre_one_pipe = ColumnTransformer([('info_scale', StandardScaler(), self.off_info_cols),
                                              ('form', OffensiveFormation(), self.off_form_cols),
                                              ])
        off_pre_one_add_col = Pipeline([('off_pre_one', off_pre_one_pipe),
                                        ('func_trans', FunctionTransformer(lambda x:
                                                                           pd.DataFrame(x,
                                    columns=list(self.off_info_cols) + list(self.off_form_cols)))),
                                        ('select_cols', FeatureSelector(list(self.off_info_cols) +
                                                                        list(self.off_form_cols)))])

        form_one_pipe = ColumnTransformer([('off_form_one', OneHotEncoder(), [-1])], remainder='passthrough')
        off_full_pipe = Pipeline([('full_cols', off_pre_one_add_col), ('one_hot', form_one_pipe)])

        def_one_pipe = ColumnTransformer([('def_clust_one', OneHotEncoder(), [-1])], remainder='passthrough')
        def_full_pipe = Pipeline([('def_clust', DefensiveClustering()), ('def_clust_one', def_one_pipe)])

        full_pipe = ColumnTransformer([('off', off_full_pipe, self.off_col),
                                       ('def', def_full_pipe, self.def_col)])

        if side == 'off':
            # Offensive pipeline alone
            pipe = Pipeline([('off_full_pipe', off_full_pipe), ('model', model)])
        elif side == 'def':
            pipe = Pipeline([('def_full_pipe', def_full_pipe),
                                   ('model', model)])
        elif side == 'both':
            pipe = Pipeline([('full_pipe', full_pipe),
                                    ('to_float', FunctionTransformer(lambda x: x.astype(float))),
                                    ('model', model)])
        else:
            raise SideNotValidError
        return pipe

In [8]:
pipe_wrap = FullPipeWrapper()
pipe = pipe_wrap.build_pipe('off')

positional data already downloaded.
reading positional data.
returning positional data.


In [13]:
from sklearn.tree import DecisionTreeClassifier

pipe_def_tree = pipe_wrap.build_pipe('both', DecisionTreeClassifier())

In [14]:
from sklearn.model_selection import GridSearchCV

params = {'model__max_depth': range(3, 6)}
grid = GridSearchCV(pipe_def_tree, params)

In [15]:
grid.fit(pipe_wrap.X_train, pipe_wrap.y_train_x)

GridSearchCV(estimator=Pipeline(steps=[('full_pipe',
                                        ColumnTransformer(transformers=[('off',
                                                                         Pipeline(steps=[('full_cols',
                                                                                          Pipeline(steps=[('off_pre_one',
                                                                                                           ColumnTransformer(transformers=[('info_scale',
                                                                                                                                            StandardScaler(),
                                                                                                                                            Index(['perc_left', 'perc_right', 'perc_behind_los', 'FB', 'HB', 'QB', 'RB',
       'TE', 'WR'],
      dtype='object')),
                                                                     

In [16]:
grid.best_score_

0.40224292349075325