Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
TICC/UnitTest.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
96 lines (76 sloc)
4.67 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
from TICC_solver import TICC | |
import numpy as np | |
import sys | |
class TestStringMethods(unittest.TestCase): | |
def test_example(self): | |
fname = "example_data.txt" | |
ticc = TICC(window_size = 1,number_of_clusters = 8, lambda_parameter = 11e-2, beta = 600, maxIters = 100, | |
threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1) | |
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname) | |
assign = np.loadtxt("UnitTest_Data/Results.txt") | |
val = abs(assign - cluster_assignment) | |
self.assertEqual(sum(val), 0) | |
# Test prediction works with batch of data outside of `fit` method. Perhaps there is a better way | |
# to test this in parallel so these are more like unit tests rather than integration tests? | |
test_batch = ticc.predict_clusters(ticc.trained_model['complete_D_train'][0:1000, ]) | |
batch_val = abs(test_batch - cluster_assignment[0:1000]) | |
self.assertEqual(sum(batch_val), 0) | |
# Test streaming by passing in 5 row blocks at a time (current timestamp and previous 4) | |
# I am causing data leakage by training on the whole set and then using the trained model while streaming, | |
# but this is for testing the code, so it is ok | |
# TODO: figure out why larger blocks don't improve predictions more. Reference: | |
# https://github.com/davidhallac/TICC/issues/18#issuecomment-384514116 | |
def test_streaming(block_size): | |
test_stream = np.zeros(1000) | |
test_stream[0:block_size] = cluster_assignment[0:block_size] | |
for i in range(block_size, 1000): | |
point = ticc.trained_model['complete_D_train'][i - block_size:i, ] | |
test_stream[i] = ticc.predict_clusters(point)[block_size - 1] | |
percent_correct_streaming = 100 * sum(cluster_assignment[0:1000] == test_stream) / 1000.0 | |
self.assertGreater(percent_correct_streaming, 0.9) | |
test_streaming(5) | |
for i in range(8): | |
mrf = np.loadtxt("UnitTest_Data/cluster_"+str(i)+".txt",delimiter=',') | |
try: | |
np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3) | |
except AssertionError: | |
#Test failed | |
self.assertTrue(1==0) | |
def test_multiExample(self): | |
fname = "example_data.txt" | |
ticc = TICC(window_size = 5,number_of_clusters = 5, lambda_parameter = 11e-2, beta = 600, maxIters = 100, | |
threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1) | |
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname) | |
assign = np.loadtxt("UnitTest_Data/multiResults.txt") | |
val = abs(assign - cluster_assignment) | |
self.assertEqual(sum(val), 0) | |
for i in range(5): | |
mrf = np.loadtxt("UnitTest_Data/multiCluster_"+str(i)+".txt",delimiter=',') | |
try: | |
np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3) | |
except AssertionError: | |
#Test failed | |
self.assertTrue(1==0) | |
def test_biased_vs_unbiased(self): | |
fname = "example_data.txt" | |
unbiased_ticc = TICC(window_size=1, number_of_clusters=8, lambda_parameter=11e-2, beta=600, maxIters=100, | |
threshold=2e-5, | |
write_out_file=False, prefix_string="output_folder/", num_proc=1) | |
(unbiased_cluster_assignment, unbiased_cluster_MRFs) = unbiased_ticc.fit(input_file=fname) | |
biased_ticc = TICC(window_size=1, number_of_clusters=8, lambda_parameter=11e-2, beta=600, maxIters=100, | |
threshold=2e-5, | |
write_out_file=False, prefix_string="output_folder/", num_proc=1, biased=True) | |
(biased_cluster_assignment, biased_cluster_MRFs) = biased_ticc.fit(input_file=fname) | |
np.testing.assert_array_equal(np.array(biased_cluster_assignment), np.array(unbiased_cluster_assignment), "Biased assignment is not equel to unbiased assignment!") | |
def test_failed_unbiased(self): | |
with self.assertRaises(Exception) as context: | |
# TICC will fail in Iteration 2, because cluster 9 has only one observation. | |
fname = "example_data.txt" | |
ticc = TICC(window_size=1, number_of_clusters=50, lambda_parameter=11e-2, beta=600, maxIters=100, | |
threshold=2e-5, | |
write_out_file=False, prefix_string="output_folder/", num_proc=1) | |
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname) | |
self.assertTrue('This is broken {}'.format(context.exception)) | |
if __name__ == '__main__': | |
unittest.main() | |