In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [2]:
X = pd.read_csv('IRIS[1].csv')
X
X.shape

(150, 5)

In [3]:
X['species'].replace(['Iris-setosa','Iris-versicolor','Iris-virginica'],[0,1,2],inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['species'].replace(['Iris-setosa','Iris-versicolor','Iris-virginica'],[0,1,2],inplace = True)
  X['species'].replace(['Iris-setosa','Iris-versicolor','Iris-virginica'],[0,1,2],inplace = True)


In [4]:
X.head(140)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
135,7.7,3.0,6.1,2.3,2
136,6.3,3.4,5.6,2.4,2
137,6.4,3.1,5.5,1.8,2
138,6.0,3.0,4.8,1.8,2


In [5]:
X.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [6]:
X.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [7]:
X.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667,1.0
std,0.828066,0.433594,1.76442,0.763161,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


In [8]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 6.0 KB


In [9]:
print(f"The number of rows in the dataset is : {X.count().max()}")
print(f"The number of columns in the dataset is :{len(X.columns)}")

The number of rows in the dataset is : 150
The number of columns in the dataset is :5


In [10]:
y = X.species
y.head(140)

0      0
1      0
2      0
3      0
4      0
      ..
135    2
136    2
137    2
138    2
139    2
Name: species, Length: 140, dtype: int64

In [11]:
y.shape

(150,)

In [12]:
x = X.iloc[:,0:4]
x
x.shape

(150, 4)

In [13]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3)


In [14]:
x_train.shape


(105, 4)

In [15]:
y_train.shape

(105,)

In [16]:
x_test.shape

(45, 4)

In [17]:
y_test.shape

(45,)

In [18]:
model = LogisticRegression()
model.fit(x_train,y_train)

In [19]:
x_train_prediction = model.predict(x_train)
training_data_accuracy = accuracy_score(x_train_prediction, y_train)

In [20]:
print('Accuracy score of the training data : ', training_data_accuracy)

Accuracy score of the training data :  0.9809523809523809


In [21]:
x_test_prediction = model.predict(x_test)
test_data_accuracy = accuracy_score(x_test_prediction, y_test)

In [22]:
print('Accuracy score of the testing data : ', test_data_accuracy)

Accuracy score of the testing data :  0.9777777777777777


In [23]:
Confusion_matrix_prediction_train = confusion_matrix(y_train,x_train_prediction)
Confusion_matrix_prediction_train

array([[34,  0,  0],
       [ 0, 32,  2],
       [ 0,  0, 37]], dtype=int64)

In [24]:
Confusion_matrix_prediction = confusion_matrix(y_test,x_test_prediction)
Confusion_matrix_prediction

array([[16,  0,  0],
       [ 0, 15,  1],
       [ 0,  0, 13]], dtype=int64)

In [25]:
#DECISON TREE CLASSIFICATION
from sklearn.tree import DecisionTreeClassifier
Iris = DecisionTreeClassifier()
Iris_train = Iris.fit(x_train,y_train)
Iris_prediction = Iris.predict(x_test)

In [26]:
print("The accuracy of the decision tree classifier is :",{accuracy_score(Iris_prediction,y_test)})

The accuracy of the decision tree classifier is : {0.9777777777777777}


In [27]:
confusion_matrix_decision_tree = confusion_matrix(Iris_prediction,y_test)
confusion_matrix_decision_tree

array([[16,  0,  0],
       [ 0, 15,  0],
       [ 0,  1, 13]], dtype=int64)

In [28]:
from sklearn.ensemble import RandomForestClassifier
f = RandomForestClassifier()
Iris_T = f.fit(x_train,y_train)
iris_pre = f.predict(x_test)

In [29]:
print("the accuracy score is:",{accuracy_score(y_test,iris_pre)})

the accuracy score is: {0.9555555555555556}


In [30]:
confusion_matrix_random_forest = confusion_matrix(iris_pre,y_test)
confusion_matrix_random_forest

array([[16,  0,  0],
       [ 0, 15,  1],
       [ 0,  1, 12]], dtype=int64)

In [31]:
input_data = (6.1,2.9,4.7,1.5)
input_array_as_numpy_array = np.asarray(input_data)
input_array_reshaped = input_array_as_numpy_array.reshape(1,-1)
prediction = model.predict(input_array_reshaped)
print(prediction)
if(prediction[0]==0):
  print("Iris-setosa")
elif(prediction[0]==1):
  print("Iris-versicolor")
else:
  print("Iris-virginica")

[1]
Iris-versicolor




In [32]:
input_data = (5,3.6,1.4,0.2)
input_array_as_numpy_array = np.asarray(input_data)
input_array_reshaped = input_array_as_numpy_array.reshape(1,-1)
prediction = model.predict(input_array_reshaped)
print(prediction)
if(prediction[0]==0):
  print("Iris-setosa")
elif(prediction[0]==1):
  print("Iris-versicolor")
else:
  print("Iris-virginica")

[0]
Iris-setosa




In [33]:
import pickle
filename = 'finalized_model_1.sav'
pickle.dump(model, open(filename, 'wb'))

In [34]:
loaded_model = pickle.load(open('finalized_model_1.sav','rb'))
