In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
%matplotlib inline

import seaborn as sns

from IPython.display import display
%matplotlib inline 

class eda: 
    def __init__(self, df):
        self.df = df
        self.col = list(self.df.columns)
        self.num_col = list(self.df.select_dtypes(include=[np.number]).columns)
        self.cat_col = list(self.df.select_dtypes(include=[np.object]).columns)
        self.my_dim_num = int(np.sqrt(len(self.num_col)) + 1)
        self.my_dim_cat = int(np.sqrt(len(self.cat_col)) + 1)

    def set_seaborn(self):
        sns.set()

    def eda_hist(self, my_col='', my_figsize=(20, 20)):
            self.df.hist(layout=(self.my_dim_num, self.my_dim_num), figsize=my_figsize)
            
    def eda_scatter(self, target, my_figsize=(30, 30)):
        fig, ax = plt.subplots(nrows=self.my_dim_num, ncols=self.my_dim_num, figsize=my_figsize)

        for i, element in enumerate(self.num_col):
            my_ax = ax[i // self.my_dim_num, i % self.my_dim_num]
            self.df.plot(kind='scatter', x=element, y=target, ax=my_ax)
            
    def eda_plot_categorical(self, target, my_kind='bar', my_figsize=(40, 50)):
        fig, ax = plt.subplots(nrows=self.my_dim_cat, ncols=self.my_dim_cat, figsize=my_figsize)

        for i, element in enumerate(self.cat_col):
            df_mean = self.df.groupby([element])[target].mean()
            df_errors = self.df.groupby([element]).std()

            my_ax = ax[i // self.my_dim_cat, i % self.my_dim_cat]

            my_ax.set_xlabel(element)    
            df_mean.plot(kind=my_kind, x=element, y=target, ax=my_ax, yerr=df_errors)
            
    def eda_plot_nulls(self):
        dict_null_count = {}

        for col in self.col:
            null_count = self.df[col].isnull().sum()
            if null_count > 0:
                dict_null_count[col] = null_count

        x = np.arange(len(dict_null_count))
        y = dict_null_count.values()

        plt.bar(x, y)
        plt.xticks(x, dict_null_count.keys(), rotation='vertical')
        plt.show()


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, RidgeCV, LassoCV, ElasticNetCV
from sklearn.tree import DecisionTreeRegressor

class regularization_models:
    def __init__(self, X, y, my_test_size=.3, my_random_state=42):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=my_test_size, random_state=my_random_state)
        
        scaler = StandardScaler()
        scaler.fit(self.X_train)
        self.X_train_sc = scaler.transform(self.X_train)
        self.X_test_sc = scaler.transform(self.X_test)
        
        self.model_results = []
        
    def __repr__(self):
        return pd.DataFrame(self.model_results)
    
    def make_reg_model(self, model, X_train, y_train, X_test, y_test, name, preproc, al=None):
        if al == None:
            my_model = model()
        elif type(al) == float or type(al) == int:
            my_model = model(alpha=al)
        else:
            my_model = model(alphas=al)
            
        my_model.fit(X_train, y_train)
        train_score = my_model.score(X_train, y_train)
        test_score = my_model.score(X_test, y_test)

        self.model_results.append({'name':name,
                        'model':my_model,
                        'dataset' : 'train',
                        'preprocessing': preproc,
                        'score': train_score})
        self.model_results.append({'name':name,
                        'model':my_model,
                        'dataset' : 'test',
                        'preprocessing': preproc,
                        'score': test_score})
     
    def model_reg_all(self, lin_alpha=1.0, np_alpha=None):
        self.make_reg_model(LinearRegression, self.X_train, self.y_train, self.X_test, self.y_test, 'linear regression', 'raw')
        self.make_reg_model(LinearRegression, self.X_train_sc, self.y_train, self.X_test_sc, self.y_test, 'linear regression', 'scaled')
        
        self.make_reg_model(Ridge, self.X_train, self.y_train, self.X_test, self.y_test, 'ridge', 'raw', al=lin_alpha)
        self.make_reg_model(Ridge, self.X_train_sc, self.y_train, self.X_test_sc, self.y_test, 'ridge', 'scaled', al=lin_alpha)

        self.make_reg_model(Lasso, self.X_train, self.y_train, self.X_test, self.y_test, 'lasso', 'raw', al=lin_alpha)
        self.make_reg_model(Lasso, self.X_train_sc, self.y_train, self.X_test_sc, self.y_test, 'lasso', 'scaled', al=lin_alpha)

        self.make_reg_model(ElasticNet, self.X_train, self.y_train, self.X_test, self.y_test, 'elastic net', 'raw', al=lin_alpha)
        self.make_reg_model(ElasticNet, self.X_train_sc, self.y_train, self.X_test_sc, self.y_test, 'elastic net', 'scaled', al=lin_alpha)

        self.make_reg_model(RidgeCV, self.X_train, self.y_train, self.X_test, self.y_test, 'ridge cv', 'raw', al=np_alpha)
        self.make_reg_model(RidgeCV, self.X_train_sc, self.y_train, self.X_test_sc, self.y_test, 'ridge cv', 'scaled', al=np_alpha)

        self.make_reg_model(LassoCV, self.X_train, self.y_train, self.X_test, self.y_test, 'lasso cv', 'raw', al=np_alpha)
        self.make_reg_model(LassoCV, self.X_train_sc, self.y_train, self.X_test_sc, self.y_test, 'lasso cv', 'scaled', al=np_alpha)

        self.make_reg_model(ElasticNetCV, self.X_train, self.y_train, self.X_test, self.y_test, 'elastic net cv', 'raw', al=np_alpha)
        self.make_reg_model(ElasticNetCV, self.X_train_sc, self.y_train, self.X_test_sc, self.y_test, 'elastic net cv', 'scaled', al=np_alpha)
        
        self.make_reg_model(KNeighborsRegressor, self.X_train, self.y_train, self.X_test, self.y_test, 'kneighbors regressor', 'raw')
        self.make_reg_model(KNeighborsRegressor, self.X_train_sc, self.y_train, self.X_test_sc, self.y_test, 'kneighbors regressor', 'scaled')

        self.make_reg_model(DecisionTreeRegressor, self.X_train, self.y_train, self.X_test, self.y_test, 'decision tree regressor', 'raw')
        self.make_reg_model(DecisionTreeRegressor, self.X_train_sc, self.y_train, self.X_test_sc, self.y_test, 'decision tree regressor', 'scaled')
        