# problem 1

In [12]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, f1_score

# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train decision tree models and evaluate performance at different depths
results = []
for depth in range(1, 6):
    clf = DecisionTreeClassifier(max_depth=depth, min_samples_split=5, min_samples_leaf=2, random_state=42)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    
    results.append((depth, precision, recall, f1))

# Print results
print("Depth | Precision | Recall | F1 Score")
for depth, precision, recall, f1 in results:
    print(f"{depth}     | {precision:.4f}  | {recall:.4f} | {f1:.4f}")

Depth | Precision | Recall | F1 Score
1     | 0.5667  | 0.7111 | 0.6148
2     | 0.9794  | 0.9778 | 0.9777
3     | 1.0000  | 1.0000 | 1.0000
4     | 1.0000  | 1.0000 | 1.0000
5     | 1.0000  | 1.0000 | 1.0000


# problem2

In [13]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

# Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"
columns = ['id', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 
           'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 
           'Bland Chromatin', 'Normal Nucleoli', 'Mitoses', 'Class']
df = pd.read_csv(url, names=columns)
df.replace('?', pd.NA, inplace=True)
df.dropna(inplace=True)

# Features and target variables
X = df.iloc[:, 1:-1]
y = df['Class'].apply(lambda x: 0 if x == 2 else 1)  # Convert classes to 0 and 1

# Train the decision tree model
clf = DecisionTreeClassifier(max_depth=2, min_samples_split=5, min_samples_leaf=2, random_state=42)
clf.fit(X, y)

# Calculate feature importance
print("Feature Importances:", clf.feature_importances_)

Feature Importances: [0.         0.87326695 0.08084315 0.         0.         0.0458899
 0.         0.         0.        ]


# problem3

In [14]:
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

# PCA for dimensionality reduction
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Train the decision tree model on PCA-transformed data
clf_pca = DecisionTreeClassifier(max_depth=2, min_samples_split=5, min_samples_leaf=2, random_state=42)
clf_pca.fit(X_pca, y)

# Evaluate the model
y_pred_pca = clf_pca.predict(X_pca)
cm = confusion_matrix(y, y_pred_pca)
precision = precision_score(y, y_pred_pca)
recall = recall_score(y, y_pred_pca)
f1 = f1_score(y, y_pred_pca)

print("Confusion Matrix:\n", cm)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Confusion Matrix:
 [[429  15]
 [  1 238]]
Precision: 0.9407114624505929
Recall: 0.99581589958159
F1 Score: 0.967479674796748
