In [1]:

import os
import json
from pypelines.sklearn_pypeline import SklearnPipeline

from pypelines.sklearn.classification import models_classification , model_comparison_classification
from pypelines.sklearn.regression import models_regression, model_comparison_regression

In [2]:
from pypelines.sklearn import classification

In [3]:
# code output
skl_pypelines = SklearnPipeline(data = "titanic",target = 'Survived'
                            , model_type = 'classification'
                            , models = ['GaussianNB Classifier','Logistic Regression','Random Forest']
                            , nfolds = 5, output_format='code')


In [4]:
skl_pypelines.model_list()

['GaussianNB Classifier', 'Logistic Regression', 'Random Forest']


In [5]:
skl_pypelines.get_code()


from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score


import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import plotly.express as px
import plotly.graph_objects as go


# target dataframe: titanic
titanic = pd.read_csv("./titanic.csv")
target = "Survived"
features = list(titanic.columns.drop("Survived"))
feature_df = titanic[features]

# get numerical and categorical columns
bool_cols = feature_df.select_dtypes(include=['bool']).columns.tolist()
titanic[bool_cols] = feature_df[bool_cols].astype(int)
numerical_cols = feature_df.select_dtypes(include=['int', 'float']).columns.tolist()
categorical_cols = feature_df.select_dtypes(include=['object']).columns.tolist(

In [9]:
# script output
skl_pypelines = SklearnPipeline(data = "titanic",target = 'Survived'
                            , model_type = 'regression'
                            #, models = ['GaussianNB Classifier','Logistic Regression','Random Forest']
                            , nfolds = 5, output_format='script',
                            output_folder = "./code_output")

In [10]:
skl_pypelines.model_list()

['Elastic Net', 'Linear Regression', 'Lasso', 'Ridge', 'SGD Regressor Regression', 'Histogram Gradient Boost Regression', 'Random Forest Regression', 'AdaBoost Regression', 'Poisson Regression', 'Decision Tree Regression', 'GBT Regression', 'ExtraTree Regression', 'GPR Regression', 'Bayesian ARD Regression', 'Bayesian Ridge Regression', 'Quantile Regression', 'Huber Regression', 'TheilSen Regression', 'Passive Aggressive Regression', 'Gamma Regression', 'Tweedie Regression', 'OMP Regression', 'LassoLars Regression', 'RANSAC Regression']


In [11]:
skl_pypelines.get_code()

{'data_prep_pipeline': {'code': '\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.metrics import mean_squared_error\n\n\nimport pandas as pd\nimport numpy as np\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.preprocessing import StandardScaler, OneHotEncoder\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nimport plotly.express as px\nimport plotly.graph_objects as go\n\n\n# target dataframe: titanic\ntitanic = pd.read_csv("./titanic.csv")\ntarget = "Survived"\nfeatures = list(titanic.columns.drop("Survived"))\nfeature_df = titanic[features]\n\n# get numerical and categorical columns\nbool_cols = feature_df.select_dtypes(include=[\'bool\']).columns.tolist()\ntitanic[bool_cols] = feature_df[bool_cols].astype(int)\nnumerical_cols = feature_df.select_dtypes(include=[\'int\', \'float\']).columns.tolist()\ncategoric

In [12]:
skl_pypelines.grid_search()

{'Elastic Net': {'numerical': [{'checked': True,
    'name': 'alpha',
    'min': 0.1,
    'max': 1,
    'step': 0.5},
   {'checked': True, 'name': 'l1_ratio', 'min': 0.0, 'max': 1.0, 'step': 0.1},
   {'checked': True,
    'name': 'max_iter',
    'min': 500,
    'max': 1000,
    'step': 100}],
  'categorical': [{'checked': True,
    'name': 'fit_intercept',
    'selected': [True],
    'values': [True, False]},
   {'checked': True,
    'name': 'precompute',
    'selected': [False],
    'values': [True, False]},
   {'checked': True,
    'name': 'selection',
    'selected': ['cyclic'],
    'values': ['cyclic', 'random']}]},
 'Linear Regression': {'numerical': [{'checked': True,
    'name': 'n_jobs',
    'min': 1,
    'max': 10,
    'step': 1}],
  'categorical': [{'checked': True,
    'name': 'fit_intercept',
    'selected': [True],
    'values': [True, False]},
   {'checked': False,
    'name': 'normalize',
    'selected': [True],
    'values': [True, False]}]},
 'Lasso': {'numerical': [{'