# Import Dependencies

In [1]:
import numpy as np
import pandas as pd
import sklearn
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Functions

# Maps $\Rightarrow$ Names to Class Names

In [None]:
model_selection_map = {
    
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
    'grid search' : sklearn.model_selection.GridSearchCV,
    
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html
    'random search' : sklearn.model_selection.RandomizedSearchCV,
}

split_map = {
    
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
    'train test split' : sklearn.model_selection.train_test_split,
    
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html
    'k folds' : sklearn.model_selection.KFold,
    
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html
    'stratified k folds' : sklearn.model_selection.StratifiedKFold
}

# Main Class Definitions

In [None]:
class base:
    
    def __init__(self, name, options, map_dict):
        self.update(name, options)
    
    def update(self, name, options, map_dict):
        assert name in map_dict, f"{name} must be a key in the map dictionary. keys for dictionary provided are {map_dict.keys()}."
        self.name = name
        self.options = options
        self.map = map_dict

In [None]:
class split(base):
    
    def __init__(self, name, options, map_dict=split_map):
        super().__init__(name, options, map_dict)
        
    def make_sets(self, X, y):
        
        if self.name == 'train test split':
            self.__train_test_split(X, y)
        elif self.name == 'k folds':
            self.__k_folds(X)
        elif self.name == 'stratified k folds':
            self.__stratified_k_folds(X, y)
    
    def __train_test_split(self, X, y):
        Xtrain, Xtest, ytrain, ytest = self.map[self.name](X, y, **self.options)
        self.train = {'X':Xtrain, 'y':ytrain}
        self.test = {'X':Xtest, 'y':ytest}
    
    def __k_folds(self, X):
        kfolds = self.map[self.name](**self.options)
        self.kfolds = {i:{'train':train_indices, 'test':test_indicies} for i,(train_indices,test_indices) in enumerate(kfolds.split(X))}
    
    def __stratified_k_folds(self, X, y):
        kfolds = self.map[self.name](**self.options)
        self.kfolds = {i:{'train':train_indices, 'test':test_indices} for i,(train_indices,test_indices) in enumerate(kfolds.split(X,y))}

In [None]:
class model_selection(base):
    
    def __init__(self, name, options, map_dict=model_selection_map):
        super().__init__(name, options, map_dict)

In [None]:
class model:
    
    def __init__(self):
        pass

In [None]:
class brute_force_explore:
    
    def __init__(self):
        pass

In [None]:
np.bincount()

In [None]:
sklearn.mod

# Simulate Data

In [None]:
N,K,D = 1000,3,2

X = np.vstack((
    np.random.randn(N//K,D) + np.array([2,2]),
    np.random.randn(N//K,D) + np.array([2,-2]),
    np.random.randn(N//K,D) + np.array([-2,0])
))

y = np.array([0]*(N//K) + [1]*(N//K) + [2]*(N//K))

# Example - Random Forest Classifiation

In [None]:
rf = sklearn.ensemble.RandomForestClassifier(n_estimators=100)

In [None]:
Xtrain, Xtest, ytrain, ytest = sklearn.model_selection.train_test_split(X,y)

In [None]:
rf.fit(Xtrain, ytrain)

In [None]:
yhat = rf.predict(Xtest)
np.mean(yhat == ytest)

In [None]:
kf = sklearn.model_selection.KFold(n_splits=2)
folds = {i:{'train':train_index, 'test':test_index} for i,(train_index,test_index) in enumerate(kf.split(X))}
# for i, (train_index, test_index) in enumerate(kf.split(X)):
#     print("TRAIN:", train_index, "TEST:", test_index)
#     X_train, X_test = X[train_index], X[test_index]
#     y_train, y_test = y[train_index], y[test_index]

In [None]:
folds[0]

In [None]:
kwargs = {
    
    # model
    'model' : 'random forest',
    'model_options' : {
        'n_estimators' : 'warn',
        'criterion' : 'gini',
        'max_depth' : None,
        'min_samples_split' : 2,
        'min_samples_leaf' : 1,
        'min_weight_fraction_leaf' : 0.0,
        'max_features' : 'auto',
        'max_leaf_nodes' : None,
        'min_impurity_decrease' : 0.0,
        'min_impurity_split' : None,
        'bootstrap' : True,
        'oob_score' : False,
        'n_jobs' : None,
        'random_state' : None,
        'verbose' : 0,
        'warm_start' : False,
        'class_weight' : None,
    },
    
    # hyperparameter tuning (model selection)
    # 'train test split' : https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html
    # 'k folds' : 
    'tuning' : 'train test split',
    'tuning_options' : {
        'test_size' : 0.2,
        'train_size' : None,
        'random_state' : None,
        'shuffle' : True,
        'stratify' : None
    },
    
    # cross validation options
    # random: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html
    # grid: https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
    'cross_validation' : 'train test split',
    'cross_validation_options' : {
        'n_iter' : 10,
        'scoring' : None,
        'n_jobs' : None,
        'iid' : 'warn',
        'refit' : True,
        'cv' : 'warn',
        'verbose' : 0,
        'pre_dispatch' : '2*n_jobs',
        'random_state' : None,
        'error_score' : 'raise-deprecating',
        'return_train_score' : False
    }
    
}