In [1]:
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn import metrics

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a decision tree classifier
clf = DecisionTreeClassifier()

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Print the decision tree
tree_rules = export_text(clf, feature_names=iris.feature_names)
print("Decision Tree Rules:\n", tree_rules)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Print the confusion matrix and classification report
confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
classification_report = metrics.classification_report(y_test, y_pred)

print("Confusion Matrix:\n", confusion_matrix)
print("\nClassification Report:\n", classification_report)


Decision Tree Rules:
 |--- petal length (cm) <= 2.45
|   |--- class: 0
|--- petal length (cm) >  2.45
|   |--- petal length (cm) <= 4.75
|   |   |--- petal width (cm) <= 1.65
|   |   |   |--- class: 1
|   |   |--- petal width (cm) >  1.65
|   |   |   |--- class: 2
|   |--- petal length (cm) >  4.75
|   |   |--- petal width (cm) <= 1.75
|   |   |   |--- petal length (cm) <= 4.95
|   |   |   |   |--- class: 1
|   |   |   |--- petal length (cm) >  4.95
|   |   |   |   |--- petal width (cm) <= 1.55
|   |   |   |   |   |--- class: 2
|   |   |   |   |--- petal width (cm) >  1.55
|   |   |   |   |   |--- sepal length (cm) <= 6.95
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- sepal length (cm) >  6.95
|   |   |   |   |   |   |--- class: 2
|   |   |--- petal width (cm) >  1.75
|   |   |   |--- petal length (cm) <= 4.85
|   |   |   |   |--- sepal length (cm) <= 5.95
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- sepal length (cm) >  5.95
|   |   |   |   |   |--- class: 2

In [2]:
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_predict, cross_val_score, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a logistic regression classifier
clf = LogisticRegression(max_iter=1000)

# Perform 5-fold cross-validation and get predicted labels
y_pred_cv = cross_val_predict(clf, X, y, cv=5)

# Print overall accuracy for all folds
accuracy_cv = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
print("Overall Accuracy for all Folds:", accuracy_cv)

# Evaluate the best model (train on the entire dataset and test on the test set)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# Print confusion matrix and classification report for the best model
confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
classification_report = metrics.classification_report(y_test, y_pred)

print("\nConfusion Matrix:\n", confusion_matrix)
print("\nClassification Report:\n", classification_report)


Overall Accuracy for all Folds: [0.96666667 1.         0.93333333 0.96666667 1.        ]

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [None]:
#QUESTION 3

In [3]:
# Perform 5-fold cross-validation and get predicted labels
y_pred_cv = cross_val_predict(clf, X, y, cv=5)

# Print overall accuracy for all folds
accuracy_cv = cross_val_score(clf, X, y, cv=5, scoring='accuracy')
print("Overall Accuracy for all Folds (Logistic Regression):", accuracy_cv)


Overall Accuracy for all Folds (Logistic Regression): [0.96666667 1.         0.93333333 0.96666667 1.        ]


In [None]:
#In cross-validation, Decision Tree accuracies may vary due to its sensitivity to specific data subsets, while Logistic Regression offers more
#consistent fold accuracies. The overall accuracy mean of Decision Trees could be influenced by variability, potentially limiting its representation
#of performance on unseen data, whereas Logistic Regression's mean accuracy provides a stable indicator of generalization. Decision Trees, capturing 
#complex patterns, may overfit, while Logistic Regression's simplicity allows for better generalization, and confusion matrices aid in evaluating
#precision, recall, and F1-score for each class in both models.



#Logistic Regression model exhibits a consistent performance across the 5-fold cross-validation on the Iris dataset. With accuracy ranging
# from 93.33% to 100% across different subsets .The accuracy values for each fold in the 5-fold cross-validation using Logistic Regression
# are Fold 1: 96.67%
# Fold 2: 100%
# Fold 3: 93.33%
# Fold 4: 96.67%
# Fold 5: 100%
#This consistency suggests that the Logistic Regression model is performing well across different subsets of the data.This consistency suggests
# a good generalization capability, indicating that the model is not overfitting to specific examples.