In [9]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.externals import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [10]:
iris_df = load_iris()

In [11]:
iris_df.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [12]:
iris_df.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [13]:
iris_df.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [14]:
iris_df.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [15]:
x_train, x_test, y_train, y_test = train_test_split(iris_df.data, iris_df.target, test_size=0.3, random_state=0)

In [17]:
pipeline_lr = Pipeline([('scalar1', StandardScaler()),
                       ('pca1', PCA(n_components=2)),
                       ('lr_classifier', LogisticRegression(random_state=0))])

In [18]:
pipeline_dt = Pipeline([('scalar2', StandardScaler()),
                       ('pca2', PCA(n_components=2)),
                       ('dt_classifier', DecisionTreeClassifier())])

In [21]:
pipeline_randomforest = Pipeline([('scalar3', StandardScaler()),
                                 ('pca3', PCA(n_components=2)),
                                 ('rf_classifier', RandomForestClassifier())])

In [22]:
#Creating a list of pipelines 
pipelines = [pipeline_lr, pipeline_dt, pipeline_randomforest]

In [23]:
#Initializing some of the variables 
best_accuracy = 0.0
best_classifier = 0.0
best_pipeline = ""

In [24]:
#Creating a dictionary of pipelines for an easy refrence

pipe_dict = {0 : 'Logistic Regression', 1 : 'Decision tree', 2 : 'Random Forest Classifier'}

#Fitting the pipelines
for pipe in pipelines:
    pipe.fit(x_train, y_train)    



In [26]:
for i, model in enumerate(pipelines):
    print('{} Test Accuracy : {}'.format(pipe_dict[i], model.score(x_test, y_test)))

Logistic Regression Test Accuracy : 0.8666666666666667
Decision tree Test Accuracy : 0.9111111111111111
Random Forest Classifier Test Accuracy : 0.8888888888888888


In [27]:
for i,model in enumerate(pipelines):
    if model.score(x_test,y_test) > best_accuracy:
        best_accuracy = model.score(x_test,y_test)
        best_pipeline = model
        best_classifier = 1
print("Classifier with best accuracy is : {}".format(pipe_dict[best_classifier]))

Classifier with best accuracy is : Decision tree
