Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
72 lines (56 sloc) 3.14 KB
import unittest
from TICC_solver import TICC
import numpy as np
import sys
class TestStringMethods(unittest.TestCase):
def test_example(self):
fname = "example_data.txt"
ticc = TICC(window_size = 1,number_of_clusters = 8, lambda_parameter = 11e-2, beta = 600, maxIters = 100,
threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1)
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
assign = np.loadtxt("UnitTest_Data/Results.txt")
val = abs(assign - cluster_assignment)
self.assertEqual(sum(val), 0)
# Test prediction works with batch of data outside of `fit` method. Perhaps there is a better way
# to test this in parallel so these are more like unit tests rather than integration tests?
test_batch = ticc.predict_clusters(ticc.trained_model['complete_D_train'][0:1000, ])
batch_val = abs(test_batch - cluster_assignment[0:1000])
self.assertEqual(sum(batch_val), 0)
# Test streaming by passing in 5 row blocks at a time (current timestamp and previous 4)
# I am causing data leakage by training on the whole set and then using the trained model while streaming,
# but this is for testing the code, so it is ok
# TODO: figure out why larger blocks don't improve predictions more. Reference:
# https://github.com/davidhallac/TICC/issues/18#issuecomment-384514116
def test_streaming(block_size):
test_stream = np.zeros(1000)
test_stream[0:block_size] = cluster_assignment[0:block_size]
for i in range(block_size, 1000):
point = ticc.trained_model['complete_D_train'][i - block_size:i, ]
test_stream[i] = ticc.predict_clusters(point)[block_size - 1]
percent_correct_streaming = 100 * sum(cluster_assignment[0:1000] == test_stream) / 1000.0
self.assertGreater(percent_correct_streaming, 0.9)
test_streaming(5)
for i in range(8):
mrf = np.loadtxt("UnitTest_Data/cluster_"+str(i)+".txt",delimiter=',')
try:
np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3)
except AssertionError:
#Test failed
self.assertTrue(1==0)
def test_multiExample(self):
fname = "example_data.txt"
ticc = TICC(window_size = 5,number_of_clusters = 5, lambda_parameter = 11e-2, beta = 600, maxIters = 100,
threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1)
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
assign = np.loadtxt("UnitTest_Data/multiResults.txt")
val = abs(assign - cluster_assignment)
self.assertEqual(sum(val), 0)
for i in range(5):
mrf = np.loadtxt("UnitTest_Data/multiCluster_"+str(i)+".txt",delimiter=',')
try:
np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3)
except AssertionError:
#Test failed
self.assertTrue(1==0)
if __name__ == '__main__':
unittest.main()