# **Decision Trees and Random Forests. Classification/Regression**

In [1]:
from sklearn.datasets import load_breast_cancer, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

from scratchml.supervised_learning.tree import DecisionTreeC, DecisionTreeR
from scratchml.supervised_learning.ensemble import RandomForestC, RandomForestR

## **Classification**

### **Data**

We will be using the https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html dataset to test the model.

In [3]:
# Load the California Housing dataset
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shapes of the resulting sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (455, 30)
y_train shape: (455,)
X_test shape: (114, 30)
y_test shape: (114,)


### **Models fitting**

In [5]:
# Insantiate models
clf_tree_ = DecisionTreeC(max_depth=2)
clf_tree_sklearn = DecisionTreeClassifier(max_depth=2)
clf_rf = RandomForestC(n_estimators=100,max_depth=2)

# Fit models
clf_tree_.fit(X_train,y_train)
clf_tree_sklearn.fit(X_train,y_train)
clf_rf.fit(X_train,y_train)

  gini = 1 - np.sum(np.array(num_samples_per_class)**2) / n_samples_**2
  gini = 1 - np.sum(np.array(num_samples_per_class)**2) / n_samples_**2


### **Evaluation (Accuracy score)**

In [6]:
# Evaluate models
y_pred = clf_tree_sklearn.predict(X_test)
acc_sklearn = accuracy_score(y_test, y_pred)

y_pred_tree_ = clf_tree_.predict(X_test)
acc_tree_ = accuracy_score(y_test, y_pred_tree_)

y_pred_rf = clf_rf.predict(X_test)
acc_rf = accuracy_score(y_test,y_pred_rf)

print(f"Acc for Decision Tree (Sklearn): {acc_sklearn}")
print(f"Acc for Decision Tree (scratchml): {acc_tree_}")
print(f"Acc for Random Forest (scratchml): {acc_rf}")

Acc for Decision Tree (Sklearn): 0.9298245614035088
Acc for Decision Tree (scratchml): 0.9298245614035088
Acc for Random Forest (scratchml): 0.956140350877193


## **Regression**

### **Data**

We will be using the https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_breast_cancer.html dataset to test the model.

In [2]:
# Load the California Housing dataset
data = fetch_california_housing()
X, y = data.data, data.target

In [3]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shapes of the resulting sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (16512, 8)
y_train shape: (16512,)
X_test shape: (4128, 8)
y_test shape: (4128,)


#### **Models fitting**

In [4]:
# Insantiate models
reg_tree_ = DecisionTreeR(max_depth=2)
reg_tree_sklearn = DecisionTreeRegressor(max_depth=2)
reg_rf = RandomForestR(n_estimators=100,max_depth=2)

# Fit models
reg_tree_.fit(X_train,y_train)
reg_tree_sklearn.fit(X_train,y_train)
reg_rf.fit(X_train,y_train)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


#### **Evaluation (MSE)**

In [5]:
# Evaluate models
y_pred = reg_tree_sklearn.predict(X_test)
mse_sklearn = mean_squared_error(y_test, y_pred)

y_pred_tree_ = reg_tree_.predict(X_test)
mse_tree_ = mean_squared_error(y_test, y_pred_tree_)

y_pred_rf = reg_rf.predict(X_test)
mse_rf = mean_squared_error(y_test,y_pred_rf)

print(f"MSE for Decision Tree (Sklearn): {mse_sklearn}")
print(f"MSE for Decision Tree (scratchml): {mse_tree_}")
print(f"MSE for Random Forest (scratchml): {mse_rf}")

Acc for Decision Tree (Sklearn): 0.7542635096031616
Acc for Decision Tree (scratchml): 0.754263509603161
Acc for Random Forest (scratchml): 0.7312952789560578
