-
Notifications
You must be signed in to change notification settings - Fork 0
/
random_forests_balancing_change.py
executable file
·87 lines (77 loc) · 3.72 KB
/
random_forests_balancing_change.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
'''
Created on 15/11/2015
@author: Alexandre Yukio Yamashita
@author: Celso Kakihara
'''
from sklearn.ensemble.forest import RandomForestClassifier
from data.plot import plot
from data.data import Data
from data.numpy_file import save_np_array, load_np_array
import numpy as np
from statistics.confusion_matrix import confusion_matrix
from statistics.performance import compute_performance_metrics, compute_auc
if __name__ == '__main__':
'''
Classify data changing balancing ratio.
'''
# accuracy_history = []
# precision_history = []
# recall_history = []
# auc_history = []
# balancing_rate_history = []
#
# for i in range(35):
# load_path = "../homesite_data/resources/splitted_data.bin"
# homesite = Data()
# homesite.load_sliptted_data(load_path)
# homesite.z_norm_by_feature()
# del homesite.test_x # Deleted to save memory.
# homesite.train_y = homesite.train_y.flatten()
# homesite.validation_y = homesite.validation_y.flatten()
#
# if i > 0:
# # Balance data.
# homesite.balance_data_oversampling(ratio = i * 0.1, balance_type = "OverSampler")
#
# # Creating classifier.
# clf = RandomForestClassifier(max_features = 100)
#
# # Train classifier.
# print "Training classifier."
# clf.fit(homesite.train_x, homesite.train_y)
#
# # Test classifier.
# print 'Testing classifier.'
# predicted_labels = clf.predict_proba(homesite.validation_x)[:, 1]
#
# # Show final results.
# results = confusion_matrix(homesite.validation_y, np.round(predicted_labels))
# accuracy, precision, recall = compute_performance_metrics(results)
# auc = compute_auc(homesite.validation_y, predicted_labels)
#
# accuracy_history.append(accuracy)
# precision_history.append(precision)
# recall_history.append(recall)
# auc_history.append(auc)
# balancing_rate = np.count_nonzero(homesite.train_y) * 1.0 / len(homesite.train_y)
# balancing_rate_history.append(balancing_rate)
#
# print 'Saving result.', i * 0.1
# save_np_array("../homesite_data/results/random_forest_balancing_accuracy.bin", np.array(accuracy_history))
# save_np_array("../homesite_data/results/random_forest_balancing_precision.bin", np.array(precision_history))
# save_np_array("../homesite_data/results/random_forest_balancing_recall.bin", np.array(recall_history))
# save_np_array("../homesite_data/results/random_forest_balancing_auc.bin", np.array(auc_history))
# save_np_array("../homesite_data/results/random_forest_balancing_rate.bin", np.array(balancing_rate_history))
#
# del homesite
# del clf
accuracy_history = load_np_array("../homesite_data/results/random_forest_balancing_accuracy.bin")
precision_history = load_np_array("../homesite_data/results/random_forest_balancing_precision.bin")
recall_history = load_np_array("../homesite_data/results/random_forest_balancing_recall.bin")
auc_history = load_np_array("../homesite_data/results/random_forest_balancing_auc.bin")
balancing_rate_history = load_np_array("../homesite_data/results/random_forest_balancing_rate.bin")
# for accuracy, precision, recall, auc, balancing_rate in zip(accuracy_history, precision_history, recall_history, auc_history, balancing_rate_history):
# print accuracy, precision, recall, auc, balancing_rate
plot("../homesite_data/results/random_forest_balacing.png", [recall_history, auc_history], \
["sensitividade ", "AUC"], "taxa de balanceamento", "metricas", 'center right', \
x = np.linspace(0, len(recall_history) / 10, num = len(recall_history), endpoint = True))