In this project, we will classify the Iris dataset into three types of iris flowers (Setosa, Versicolor, and Virginica) using three classification algorithms: Logistic Regression, Decision Tree, and Random Forest.

Note: This project is referred to as Multi-Class Classification.

In [2]:
#Importing data from scikit learn and divide into 2 versions x = data, y = target
import pandas as pd
from sklearn import datasets

iris = datasets.load_iris()

x = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target, name='target')

In [3]:
x

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [4]:
y #0,1,2 are iris types (setosa,versicolor,virginica)

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: target, Length: 150, dtype: int32

In [5]:
#combining both x and y to see full data
iris_full = pd.concat([x,y],axis=1)
iris_full

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [6]:
#importing train test split with rs =42
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [7]:
#importing logistic regression and make the model with rs = 42
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state=42)
model.fit(x_train,y_train)

In [8]:
#testing model and accuracy seeing
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
y_pred_LR = model.predict(x_test)
akurasi_LR = accuracy_score(y_test,y_pred_LR)

print(f"akurasi Logistic Regression = {akurasi_LR*100:.2f}%")
#keep in mind accuracy score is usable due to the iris data being spread thoroughly [Balance]
#check with y.value_counts(normalize=True)

akurasi Logistic Regression = 100.00%


In [9]:
#classification report
print(classification_report(y_test,y_pred_LR, target_names=iris.target_names))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [12]:
#confusion matrix
import matplotlib.pyplot as plt
import seaborn as sns

cm_LR = confusion_matrix(y_test,y_pred_LR)
plt.figure(figsize=(8,6))
sns.heatmap(cm_LR,annot=True,fmt="d",cmap='rocket',xticklabels=iris.target_names,yticklabels=iris.target_names)
plt.title('Confusion Matrix Logistic Regression')
plt.xlabel('Prediksi')
plt.ylabel('Nilai Asli')
plt.show()

ModuleNotFoundError: No module named 'seaborn'

In [13]:
#Decision Tree Model with rs=42
from sklearn.tree import DecisionTreeClassifier
model_2 = DecisionTreeClassifier(random_state=42)
model_2.fit(x_train,y_train)

In [16]:
#accuracy check
y_pred_DT = model_2.predict(x_test)
akurasi_DT = accuracy_score(y_test,y_pred_DT)

print(f"akurasi Decision Tree = {akurasi_DT*100.:2f}%")

akurasi Decision Tree = 100.000000%


In [17]:
print(classification_report(y_test,y_pred_DT,target_names=iris.target_names))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [19]:
import matplotlib.pyplot as plt
import seaborn as sns

cm_LR = confusion_matrix(y_test,y_pred_LR)
plt.figure(figsize=(8,6))
sns.heatmap(cm_LR,annot=True,fmt="d",cmap='rocket',xticklabels=iris.target_names
            ,yticklabels=iris.target_names)
plt.title('Confusion Matrix Logistic Regression')
plt.xlabel('Prediksi')
plt.ylabel('Nilai Asli')
plt.show()

ModuleNotFoundError: No module named 'seaborn'

In [None]:
#Random Forest Model Making
from sklearn.ensemble import RandomForestClassifier
model_3 = RandomForestClassifier(random_state=42)
model_3.fit(x_train,y_train)

In [None]:
#Accuracy Check
y_pred_RF = model_3.predict(x_test)
akurasi_RF = accuracy_score(y_test,y_pred_RF)
print(f"Akurasi Random Forest = {akurasi_RF*100.:2f}%")

In [None]:
#Classification Report
print(classification_report(y_test,y_pred_RF,target_names=iris.target_names))

In [None]:
#confusion matrix
cm_RF = confusion_matrix(y_test,y_pred_RF)
plt.figure(figsize=(6,8))
sns.heatmap(cm_RF,annot=True,fmt='d',cmap='magma',xticklabels=iris.target_names,
            yticklabels=iris.target_names)
plt.title('Confusion Matrix Random Forest')
plt.xlabel('Prediksi')
plt.ylabel('nilai asli')
plt.show()