# Piplines In Sklearn

In [25]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [26]:
iris_df = load_iris()

In [27]:
iris_df.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [28]:
X_train, X_test, y_train, y_test = train_test_split( iris_df.data,iris_df.target, test_size=0.33, random_state=0)

# Pipeline Creation

1. Data preprocessing usin StandardScaler()
2. Reduce dimensions using PCA
3.Apply Classifier

In [29]:
pipline_lr = Pipeline([('scaler1',StandardScaler()),
                       ('pca1',PCA(n_components=2)),
                      ('lr_classifier',LogisticRegression(random_state=0))])

In [30]:
pipline_dt = Pipeline([('scaler2',StandardScaler()),
                       ('pca2',PCA(n_components=2)),
                      ('dt_classifier',DecisionTreeClassifier(random_state=0))])

In [31]:
pipline_randomforest = Pipeline([('scaler3',StandardScaler()),
                       ('pca3',PCA(n_components=2)),
                      ('rf_classifier',RandomForestClassifier(random_state=0))])

In [32]:
##Lets make list of pipelines

pipelines = [pipline_lr,pipline_dt,pipline_randomforest]

In [33]:
best_accuracy = 0.0
best_classifier = 0
best_pipeline = ''

In [34]:
# Dictionary of pipelines and classifier types for ease of reference

pipe_dict = {0:'Logistic Regression',1:'Decision Tree',2:'RandomForest'}

# Fit the pipeline

for pipe in pipelines:
    pipe.fit(X_train,y_train)

In [35]:
for i ,model in enumerate(pipelines):
    print("{} Test Accuracy:{}".format(pipe_dict[i],model.score(X_test,y_test)))

Logistic Regression Test Accuracy:0.92
Decision Tree Test Accuracy:0.9
RandomForest Test Accuracy:0.88


In [36]:
for i,model in enumerate(pipelines):
    if model.score(X_test,y_test) > best_accuracy:
        best_accuracy = model.score(X_test,y_test)
        best_pipeline = model
        best_classifier = i
print('Classifier with best Accuracy:{}'.format(pipe_dict[best_classifier]))

Classifier with best Accuracy:Logistic Regression
