### Import the required libraries

In [30]:
import pandas as pd  # data preprocessing
# pandas is aliased as pd 
import numpy as np  # mathematical computation
# numpy is aliased as np
from sklearn.model_selection import train_test_split
# train_test_split to split data into train and test

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import pickle

### Load the dataset

In [31]:
data = pd.read_csv('iris.csv')

In [32]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,label
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


In [33]:
# print(df.shape)           # 145,5
# print(df.isnull().sum())  # there are no null values
# print(df.columns)         # ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'label']
# print(df.head(4))          
# Target variable - label
# value counts for label
# print(df['label'].value_counts())  # setosa - 49, versicolor - 50 , virginica - 50

In [34]:
data.shape

(149, 5)

In [35]:
data.isnull().sum()    # there are no null values

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
label           0
dtype: int64

In [36]:
data.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'label'], dtype='object')

### Value count for Target variable - label



In [37]:
data['label'].value_counts()

Iris-versicolor    50
Iris-virginica     50
Iris-setosa        49
Name: label, dtype: int64

### Select the dependent and independent features

In [38]:
x = df.iloc[:,:-1]

In [39]:
x

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,4.9,3.0,1.4,0.2
1,4.7,3.2,1.3,0.2
2,4.6,3.1,1.5,0.2
3,5.0,3.6,1.4,0.2
4,5.4,3.9,1.7,0.4
...,...,...,...,...
144,6.7,3.0,5.2,2.3
145,6.3,2.5,5.0,1.9
146,6.5,3.0,5.2,2.0
147,6.2,3.4,5.4,2.3


In [40]:
y = df['label']

In [41]:
y

0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
144    Iris-virginica
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
Name: label, Length: 149, dtype: object

### Split the data into train and test

In [43]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25)  #25% data will be test data

In [45]:
x_train.shape

(111, 4)

In [46]:
x_test.shape

(38, 4)

In [47]:
y_train.shape

(111,)

In [48]:
y_test.shape

(38,)

### Train the ML Model

In [49]:
lr_model = LogisticRegression(max_iter=1000)
dt_model = DecisionTreeClassifier(criterion='gini',max_depth=4,min_samples_split=15)
rf_model = RandomForestClassifier(n_estimators=70,criterion='gini',
                                    max_depth=4,min_samples_split=15)

In [51]:
lr_model.fit(x_train,y_train)

LogisticRegression(max_iter=1000)

In [52]:
dt_model.fit(x_train,y_train)

DecisionTreeClassifier(max_depth=4, min_samples_split=15)

In [53]:
rf_model.fit(x_train,y_train)

RandomForestClassifier(max_depth=4, min_samples_split=15, n_estimators=70)

### Save the trained ML Model

In [54]:
# Saving the ml model in binary format
pickle.dump(lr_model,open('lr_model.pkl','wb'))
pickle.dump(dt_model,open('dt_model.pkl','wb'))
pickle.dump(rf_model,open('rf_model.pkl','wb'))