-
Notifications
You must be signed in to change notification settings - Fork 268
/
test_tabular.py
103 lines (80 loc) · 3.47 KB
/
test_tabular.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python3
"""
Tests of ktrain text classification flows
"""
import testenv
import IPython
from unittest import TestCase, main, skip
import numpy as np
import ktrain
from ktrain import tabular
from ktrain.imports import ACC_NAME, VAL_ACC_NAME
import pandas as pd
class TestTabular(TestCase):
def test_classification(self):
train_df = pd.read_csv('tabular_data/train.csv', index_col=0)
train_df = train_df.drop('Name', 1)
train_df = train_df.drop('Ticket', 1)
trn, val, preproc = tabular.tabular_from_df(train_df, label_columns='Survived', random_state=42)
model = tabular.tabular_classifier('mlp', trn)
learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=32)
lr = 0.001
hist = learner.fit_onecycle(lr, 30)
# test training results
self.assertAlmostEqual(max(hist.history['lr']), lr)
self.assertGreater(max(hist.history[VAL_ACC_NAME]), 0.8)
# test top losses
obs = learner.top_losses(n=1, val_data=val)
self.assertIn(obs[0][0], list(range(val.df.shape[0])))
learner.view_top_losses(preproc=preproc, n=1, val_data=val)
# test weight decay
self.assertEqual(learner.get_weight_decay(), None)
learner.set_weight_decay(1e-2)
self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)
# test load and save model
learner.save_model('/tmp/test_model')
learner.load_model('/tmp/test_model')
# test validate
cm = learner.evaluate(val)
print(cm)
for i, row in enumerate(cm):
self.assertEqual(np.argmax(row), i)
# test predictor
p = ktrain.get_predictor(learner.model, preproc)
predicted_label = p.predict(train_df)[0]
self.assertIn(predicted_label, preproc.get_classes())
p.save('/tmp/test_predictor')
p = ktrain.load_predictor('/tmp/test_predictor')
self.assertEqual(p.predict(train_df)[0], predicted_label)
def test_regression(self):
trn, val, preproc = tabular.tabular_from_csv('tabular_data/adults.csv', label_columns=['age'], is_regression=True, random_state=42)
model = tabular.tabular_regression_model('mlp', trn)
learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=128)
lr = 0.001
hist = learner.autofit(lr, 5)
# test training results
self.assertAlmostEqual(max(hist.history['lr']), lr)
self.assertLess(min(hist.history['val_mae']), 8.)
# test top losses
obs = learner.top_losses(n=1, val_data=val)
self.assertIn(obs[0][0], list(range(val.df.shape[0])))
learner.view_top_losses(preproc=preproc, n=1, val_data=val)
# test weight decay
self.assertEqual(learner.get_weight_decay(), None)
learner.set_weight_decay(1e-2)
self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)
# test load and save model
learner.save_model('/tmp/test_model')
learner.load_model('/tmp/test_model')
# test validate
cm = learner.evaluate(val)
# test predictor
p = ktrain.get_predictor(learner.model, preproc)
train_df = pd.read_csv('tabular_data/adults.csv')
age= p.predict(train_df)[0][0]
self.assertLess(age, 100)
p.save('/tmp/test_predictor')
p = ktrain.load_predictor('/tmp/test_predictor')
self.assertAlmostEqual(p.predict(train_df)[0][0], age)
if __name__ == "__main__":
main()