Question 6: Python Program – Iris Dataset with Gini Criterion



In [1]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Train Decision Tree with Gini
clf = DecisionTreeClassifier(criterion="gini", random_state=42)
clf.fit(X, y)

# Predictions & Accuracy
y_pred = clf.predict(X)
accuracy = accuracy_score(y, y_pred)

print("Model Accuracy:", accuracy)
print("Feature Importances:", clf.feature_importances_)


Model Accuracy: 1.0
Feature Importances: [0.01333333 0.         0.56405596 0.42261071]


Question 7: Python Program – Max Depth Comparison


In [2]:
from sklearn.model_selection import train_test_split

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Fully grown tree
clf_full = DecisionTreeClassifier(random_state=42)
clf_full.fit(X_train, y_train)
acc_full = clf_full.score(X_test, y_test)

# Limited depth tree
clf_pruned = DecisionTreeClassifier(max_depth=3, random_state=42)
clf_pruned.fit(X_train, y_train)
acc_pruned = clf_pruned.score(X_test, y_test)

print("Fully grown tree accuracy:", acc_full)
print("Max depth=3 tree accuracy:", acc_pruned)


Fully grown tree accuracy: 1.0
Max depth=3 tree accuracy: 1.0


Question 8: Python Program – California Housing with Decision Tree Regressor

In [3]:
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Load dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
reg = DecisionTreeRegressor(random_state=42)
reg.fit(X_train, y_train)

# Predictions
y_pred = reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Feature Importances:", reg.feature_importances_)


Mean Squared Error: 0.495235205629094
Feature Importances: [0.52850909 0.05188354 0.05297497 0.02866046 0.03051568 0.13083768
 0.09371656 0.08290203]


Question 9: Python Program – Hyperparameter Tuning with GridSearchCV

In [5]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, train_test_split

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define parameter grid
param_grid = {
    'max_depth': [2, 3, 4, 5, None],
    'min_samples_split': [2, 3, 4, 5, 6, 8, 10]
}

# GridSearch with DecisionTreeClassifier
grid = GridSearchCV(
    estimator=DecisionTreeClassifier(random_state=42),
    param_grid=param_grid,
    cv=5
)

# Fit model
grid.fit(X_train, y_train)

# Print results
print("Best Parameters:", grid.best_params_)
print("Best Cross-validation Accuracy:", grid.best_score_)
print("Test Accuracy with Best Model:", grid.score(X_test, y_test))



Best Parameters: {'max_depth': 4, 'min_samples_split': 6}
Best Cross-validation Accuracy: 0.9428571428571428
Test Accuracy with Best Model: 1.0
