Data Loading

In [1]:
from DecisionTree import DescisionTree
from CrossValidator import KFoldCrossValidator
import numpy as np

## Loading datasets
clean_data = np.loadtxt('wifi_db/clean_dataset.txt')
noisy_data = np.loadtxt('wifi_db/noisy_dataset.txt')

## check shape, we expect these to be the same
print(f"Clean data shape: {clean_data.shape}")
print(f"Noisy data shape: {noisy_data.shape}")

print(f"First row of clean data: {clean_data[0]}")
print(f"First row of noisy data: {noisy_data[0]}")

Clean data shape: (2000, 8)
Noisy data shape: (2000, 8)
First row of clean data: [-64. -56. -61. -66. -71. -82. -81.   1.]
First row of noisy data: [-59. -53. -51. -54. -45. -79. -87.   4.]


Quick test to check that the trees are being created correctly

In [2]:
## We expect the tree to achieve 100% accuracy on the provided training data
tree = DescisionTree(clean_data)
avg_acc = tree.evaluate(clean_data)
assert avg_acc == 1.0, f"Expected 100% accuracy on clean training data but got {avg_acc}"
print(f"Tree achieved 100% accuracy on training data.\nTree depth on clean data: {tree.depth}")

Tree achieved 100% accuracy on training data.
Tree depth on clean data: 14


10 Fold Cross Validation

In [4]:
tenFoldValidator = KFoldCrossValidator(DescisionTree, 10)

## We expect the accuracies here to be different each time due to randomness in fold splitting
tenFoldValidator.k_fold_cross_validation(clean_data)
tenFoldValidator.k_fold_cross_validation(noisy_data)

Fold 1: Accuracy = 0.9750
Fold 2: Accuracy = 0.9750
Fold 3: Accuracy = 0.9650
Fold 4: Accuracy = 0.9800
Fold 5: Accuracy = 0.9350
Fold 6: Accuracy = 0.9900
Fold 7: Accuracy = 0.9850
Fold 8: Accuracy = 0.9850
Fold 9: Accuracy = 0.9900
Fold 10: Accuracy = 0.9750
Average Accuracy over 10 folds: 0.9755
Fold 1: Accuracy = 0.8100
Fold 2: Accuracy = 0.8450
Fold 3: Accuracy = 0.7900
Fold 4: Accuracy = 0.8400
Fold 5: Accuracy = 0.8350
Fold 6: Accuracy = 0.8200
Fold 7: Accuracy = 0.8300
Fold 8: Accuracy = 0.7850
Fold 9: Accuracy = 0.7450
Fold 10: Accuracy = 0.8200
Average Accuracy over 10 folds: 0.8120


np.float64(0.8119999999999999)