In [2]:
import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.metrics import f1_score, confusion_matrix, plot_confusion_matrix
from sklearn.metrics import accuracy_score, balanced_accuracy_score, recall_score
from sklearn.metrics import roc_auc_score, average_precision_score, precision_score

import warnings
warnings.filterwarnings("ignore")

In [3]:
class RegressionModel():
    """
    Model (of your choice) for Classification
    This Class takes X_train, X_test, y_train, and y_test and builds a Model
    
    ARGUMENTS (init)
    pipe: input a pipeline for your GridSearchCV Model to use
    X_train: your X that trains your model
    y_train: your y that trains your model
    X_train: your X to test your model with
    y_train: your y to test your model with
    params: Parameters for the pipline
    mod_name: Model Name shown in DataFrame
    verbose: Sets verbosity of GridSearchCV (1, 2, or 3)
    print_results: set to True prints scores after __init__ runs, else prints DataFrame
    """
    scores = {}
    train_scores = {}
    y_pred = None
    y_train_pred = None
    train_r2_score = None
    test_r2_score = None
    train_rmse = None
    test_rmse = None
    train_mse = None
    test_mse = None
    train_mae = None
    test_mae = None
    df = None
    y_thresh = None
        
    def __init__(self, pipe, X_train, X_test, y_train, y_test, params={}, mod_name='', 
                 round_y_threshold = 0.5, invert_y=False, verbose=2, print_results=False):
#         if(verbose > 1):
        if(print_results):
            print(datetime.datetime.now().strftime('START:  %Y-%m-%d, %H:%M:%S\n'))
        
        self.model_name = mod_name
        self.y_thresh = round_y_threshold

        self.pipe = pipe
        self.params = params
        self.model = GridSearchCV(pipe, params, n_jobs=-1, verbose=verbose)
        self.model.fit(X_train, y_train)
        self.best_params_ = self.model.best_params_
        if(len(mod_name) == 0):
            self.model_name = str(type(self.pipe[-1])).split('.')[-1][:-2]
        
        if(invert_y):
            self.y_pred = self.set_y_pred_inv(X_test)
            self.y_train_pred = self.set_y_pred_inv(X_train)
        else:
            self.y_pred = self.set_y_pred(X_test)
            self.y_train_pred = self.set_y_pred(X_train)

        self.train_r2_score = self.set_r2_score(y_train, self.y_train_pred)
        self.test_r2_score = self.set_r2_score(y_test, self.y_pred)
        self.train_rmse = self.set_rmse(y_train, self.y_train_pred)
        self.test_rmse = self.set_rmse(y_test, self.y_pred)
        self.train_mse = self.set_mse(y_train, self.y_train_pred)
        self.test_mse = self.set_mse(y_test, self.y_pred)
        self.train_mae = self.set_mae(y_train, self.y_train_pred)
        self.test_mae = self.set_mae(y_test, self.y_pred)
        
        self.set_all_score()
        self.set_DataFrame()

        if(print_results):# & verbose > 1):
            self.print_results()
    
    def DataFrame(self):
        return self.df
    
    def set_DataFrame(self):
        train_df = pd.DataFrame(np.array([v for k,v in self.train_scores.items()]), 
                               index=[k for k,v in self.train_scores.items()], 
                               columns=[self.model_name])
        test_df = pd.DataFrame(np.array([v for k,v in self.scores.items()]), 
                               index=[k for k,v in self.scores.items()], 
                               columns=[self.model_name])
        self.df = pd.concat([test_df, train_df])#, axis=1)
    
    def print_results(self, y_train, y_test):
        # print(datetime.datetime.now().strftime('START:  %Y-%m-%d, %H:%M:%S'))
        print(f'\nBaseline (y_train):')
        print(f'  - count:\n{y_train.value_counts()}')
        print(f'  - percent:\n{y_train.value_counts(normalize=True)}\n')
        print(f'\nBaseline (y_test):')
        print(f'  - count:\n{y_test.value_counts()}')
        print(f'  - percent:\n{y_test.value_counts(normalize=True)}\n')
        print(f'\nSTATS (y_train):')
        self.print_train_stats()
        print(f'\nSTATS (y_test):')
        self.print_stats()
        print(f'  - Model, BEST SCORE: {self.model.best_score_}')
        # print(datetime.datetime.now().strftime('\nFINISH: %Y-%m-%d, %H:%M:%S'))

    
    def set_all_score(self):
        self.scores = {
            'R2 Score': self.test_r2_score, 
            'RMSE': self.test_rmse, 
            'MSE': self.test_mse, 
            'MAE': self.test_mae, 
        }
        self.train_scores = {
            'Train R2 Score': self.train_r2_score, 
            'Train RMSE': self.train_rmse, 
            'Train MSE': self.train_mse, 
            'Train MAE': self.train_mae, 
        }
    
    def print_stats(self):
        for s,v in self.scores.items():
            print(f'  - {s}:\n        {v}')
        print(f'\nPIPE: {self.pipe}')
        print(f'\nbest params: {self.best_params_}')
    
    def print_train_stats(self):
        for s,v in self.train_scores.items():
            print(f'  - {s}:\n        {v}')
    
    def set_y_pred(self, X):
        if(self.y_thresh == -1):
            return self.model.predict(X)
        else:
            return np.abs(np.where(self.model.predict(X) < self.y_thresh, 0, 1))
    
    def set_y_pred_inv(self, X):
        return np.abs(np.where(self.model.predict(X) < self.y_thresh, 0, 1) - 1)
    
    def set_r2_score(self, y_true, y_predict):
        return r2_score(y_true, y_predict)
        
    def set_rmse(self, y_true, y_predict):
        return mean_squared_error(y_true, y_predict, squared=False)
    
    def set_mse(self, y_true, y_predict):
        return mean_squared_error(y_true, y_predict, squared=True)
        
    def set_mae(self, y_true, y_predict):
        return mean_absolute_error(y_true, y_predict)
    
    def check_y(self, y_true, y_pred):
        y_compare = np.where(y_true == y_pred, 1, 0)
        print(np.asarray((np.unique(y_compare, return_counts=True))))
    