In [1]:
from palmerpenguins import load_penguins
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


penguins_df = load_penguins()
target_values = penguins_df['species'].unique()

for column in ["species", "island", "sex"]:
    penguins_df[column] = LabelEncoder().fit_transform(penguins_df[column])
    
train_penguins_df, test_penguins_df = train_test_split(penguins_df, test_size=.30)

target_name = "species"
feature_names = [c for c in train_penguins_df.columns if c != target_name]

X_train, y_train = train_penguins_df[feature_names], train_penguins_df[target_name]
X_test, y_test = test_penguins_df[feature_names], test_penguins_df[target_name]

X_train.shape, y_train.shape, X_test.shape, y_test.shape


((240, 7), (240,), (104, 7), (104,))

In [2]:
from rubicon_ml import Rubicon
from rubicon_ml.sklearn import RubiconPipeline
from joy.imputation import DistributionImputation
from sklearn.neighbors import KNeighborsClassifier


rubicon = Rubicon(persistence="memory")
project = rubicon.get_or_create_project("Rubicon Pipeline Example")

imputer_strategy = "mean"
classifier_n_neighbors = 5

imputation_steps = []
for feature in feature_names:
    imputation_steps.append(("impute_{0}".format(feature), DistributionImputation(feature)))


pipe = RubiconPipeline(project, imputation_steps + [("kn", KNeighborsClassifier(n_neighbors=classifier_n_neighbors))], ignore_warnings=True)
pipe


In [3]:
pipe.fit(X_train, y_train)

In [4]:
pipe.score(X_test, y_test)

0.7596153846153846

#### Hiding Warnings in RubiconPipeline

`RubiconPipeline` has an `ignore_warnings` attribute that when set to __True__ (default=__False__) will hide warnings generated by `RubiconPipeline.fit()`, `RubiconPipeline.score()`, and `RubiconPipeline.score_samples()` when . If you wish to see warnings again in future fits and scores, simply set `RubiconPipeline.ignore_warnings = False`.

Here we are instantiating a pipeline that ignores warnings until told otherwise.


In [None]:
pipe_toggle_warnings = RubiconPipeline(
    project,
    [('scaler', StandardScaler()), ('svc', SVC())], ignore_warnings=True
)