In [9]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import graphviz



In [10]:
# Load dataset
data = load_iris()
X = data.data
y = data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train the decision tree classifier
clf = DecisionTreeClassifier(criterion='entropy')  # Using entropy for information gain
clf.fit(X_train, y_train)

# Make predictions
y_pred = clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 1.00


In [11]:


# 6. Visualize the decision tree (optional)
dot_data = export_graphviz(clf, out_file=None, feature_names=data.feature_names, class_names=data.target_names, filled=True)
graph = graphviz.Source(dot_data)
graph.render("iris_decision_tree", view=True)



'iris_decision_tree.pdf'

Explanation:
Loading the Data: We load the Iris dataset using load_iris() from sklearn.datasets. This dataset is already included in Scikit-learn, so you don't need to manually load it from a file.

Splitting the Data: We split the dataset into training and testing sets using train_test_split(). We set aside 30% of the data for testing and use the remaining 70% for training.

Training the Decision Tree: We use the DecisionTreeClassifier() from Scikit-learn to create a decision tree classifier. We specify the criterion='entropy', which tells the model to use entropy (information gain) for splitting the data. The model is then trained using the training data (X_train, y_train).

Making Predictions: After training the model, we use the test set (X_test) to predict the species of flowers. We store the predictions in y_pred.

Evaluating the Model: We calculate the accuracy of the model by comparing the predicted values (y_pred) to the actual test values (y_test).

Visualizing the Decision Tree (optional): We can visualize the trained decision tree using export_graphviz() to create a .dot file. This file can be rendered into a graphical format using graphviz.