In [50]:
# Importing Libraries
import pandas as pd
from sklearn.model_selection import train_test_split # For Spliting Purpose
from sklearn.preprocessing import StandardScaler # Normalize the varience in data for better performance
from sklearn.pipeline import Pipeline # For Creating Pipeline
from sklearn.decomposition import PCA # For Dimension Reduction
from sklearn.linear_model import LogisticRegression #First Algorithm
from sklearn.tree import DecisionTreeClassifier # Second Algorithm

In [21]:
# Load Dataset From Local Computer
df = pd.read_csv("Downloads/Iris.csv")
df = df.drop(['Id'],axis = 1) #Drop ID Because it's Not Useful Feature

In [29]:
# Checking for null values, we got all False and it meeans there is not null values in our data
df.isnull().any()

SepalLengthCm    False
SepalWidthCm     False
PetalLengthCm    False
PetalWidthCm     False
Species          False
dtype: bool

In [30]:
df

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [37]:
# Break the dataset column in feature and target
X = df.drop(['Species'],axis=1)
Y = df['Species']

0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
149    Iris-virginica
Name: Species, Length: 150, dtype: object

In [53]:
# Split Dataset
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=41)

In [54]:
y_train

18         Iris-setosa
108     Iris-virginica
98     Iris-versicolor
45         Iris-setosa
68     Iris-versicolor
            ...       
26         Iris-setosa
89     Iris-versicolor
65     Iris-versicolor
80     Iris-versicolor
140     Iris-virginica
Name: Species, Length: 105, dtype: object

In [42]:
X_train

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
60,5.0,2.0,3.5,1.0
116,6.5,3.0,5.5,1.8
144,6.7,3.3,5.7,2.5
119,6.0,2.2,5.0,1.5
108,6.7,2.5,5.8,1.8
...,...,...,...,...
9,4.9,3.1,1.5,0.1
103,6.3,2.9,5.6,1.8
67,5.8,2.7,4.1,1.0
117,7.7,3.8,6.7,2.2


In [55]:
# We Create Pipeline for Linear Regression and applied preprocessing i.e StandardScaler for normalization and PCA For
# Dimension Reduction
lr_pipeline = Pipeline([('scal1',StandardScaler()),
                     ('pca1',PCA(n_components=2)),
                     ('lr_classifier',LogisticRegression(random_state=7))])

In [56]:
# We Create Pipeline for Decision Tree and applied preprocessing i.e StandardScaler for normalization and PCA For
# Dimension Reduction
dt_pipeline = Pipeline([('scal2',StandardScaler()),
                     ('pca2',PCA(n_components=2)),
                     ('dt_classifier',DecisionTreeClassifier())])

In [57]:
# List of Pipelines
pipelines = [lr_pipeline, dt_pipeline]

In [58]:
Accuracy = 0
pipeline = ""

In [61]:
# Dictionary of pipelines and classifier types for ease of reference
pipe_dict = {0: 'Logistic Regression', 1: 'Decision Tree'}

# Fit the pipelines
for pipe in pipelines:
    pipe.fit(X_train, y_train)



In [62]:
for i,model in enumerate(pipelines):
    print("{} Test Accuracy: {}".format(pipe_dict[i],model.score(X_test,y_test)))

Logistic Regression Test Accuracy: 0.8
Decision Tree Test Accuracy: 0.8888888888888888


In [65]:
for i,model in enumerate(pipelines):
    if model.score(X_test,y_test)>Accuracy:
        best_accuracy=model.score(X_test,y_test)
        best_pipeline=model
        best_classifier=i
print('Classifier with best accuracy:{}'.format(pipe_dict[best_classifier]))

Classifier with best accuracy:Decision Tree
