In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load data
iris = load_iris()
X, y = iris.data, iris.target

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Decision Tree
clf = DecisionTreeClassifier(criterion='gini', max_depth=3)
clf.fit(X_train, y_train)

# Predict
y_pred = clf.predict(X_test)

# Accuracy
from sklearn.metrics import accuracy_score
print("Accuracy:", accuracy_score(y_test, y_pred))


In [None]:
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

plt.figure(figsize=(12,8))
plot_tree(clf, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
plt.show()


In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv('iris.csv') 
df.head() # Assuming a CSV file named 'data.csv'

In [None]:
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
from scipy.stats import chi2_contingency
import matplotlib.pyplot as plt

# Load data
iris = load_iris()
X, y = iris.data, iris.target
feature_names = iris.feature_names
class_names = iris.target_names

# Convert to DataFrame for easier chi-square calculations
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -----------------------------
# Train Decision Tree (Gini)
clf_gini = DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=42)
clf_gini.fit(X_train, y_train)
y_pred_gini = clf_gini.predict(X_test)
print("Accuracy (Gini):", accuracy_score(y_test, y_pred_gini))

# Train Decision Tree (Entropy)
clf_entropy = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=42)
clf_entropy.fit(X_train, y_train)
y_pred_entropy = clf_entropy.predict(X_test)
print("Accuracy (Entropy):", accuracy_score(y_test, y_pred_entropy))

# -----------------------------
# Calculate Chi-square for each feature against target
print("\nChi-square values for each feature:")
for feature in feature_names:
    # Discretize numeric feature into 3 bins for chi-square test
    df['feature_bin'] = pd.qcut(df[feature], q=3, duplicates='drop')
    contingency_table = pd.crosstab(df['feature_bin'], df['target'])
    chi2, p, dof, expected = chi2_contingency(contingency_table)
    print(f"{feature}: chi2 = {chi2:.2f}, p-value = {p:.4f}")

# -----------------------------
# Visualize Gini Tree
plt.figure(figsize=(12,8))
plot_tree(clf_gini, feature_names=feature_names, class_names=class_names, filled=True)
plt.show()
