Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
import unittest
from TICC_solver import TICC
import numpy as np
import sys
class TestStringMethods(unittest.TestCase):
def test_example(self):
fname = "example_data.txt"
ticc = TICC(window_size = 1,number_of_clusters = 8, lambda_parameter = 11e-2, beta = 600, maxIters = 100,
threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1)
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
assign = np.loadtxt("UnitTest_Data/Results.txt")
val = abs(assign - cluster_assignment)
self.assertEqual(sum(val), 0)
# Test prediction works with batch of data outside of `fit` method. Perhaps there is a better way
# to test this in parallel so these are more like unit tests rather than integration tests?
test_batch = ticc.predict_clusters(ticc.trained_model['complete_D_train'][0:1000, ])
batch_val = abs(test_batch - cluster_assignment[0:1000])
self.assertEqual(sum(batch_val), 0)
# Test streaming by passing in 5 row blocks at a time (current timestamp and previous 4)
# I am causing data leakage by training on the whole set and then using the trained model while streaming,
# but this is for testing the code, so it is ok
# TODO: figure out why larger blocks don't improve predictions more. Reference:
# https://github.com/davidhallac/TICC/issues/18#issuecomment-384514116
def test_streaming(block_size):
test_stream = np.zeros(1000)
test_stream[0:block_size] = cluster_assignment[0:block_size]
for i in range(block_size, 1000):
point = ticc.trained_model['complete_D_train'][i - block_size:i, ]
test_stream[i] = ticc.predict_clusters(point)[block_size - 1]
percent_correct_streaming = 100 * sum(cluster_assignment[0:1000] == test_stream) / 1000.0
self.assertGreater(percent_correct_streaming, 0.9)
test_streaming(5)
for i in range(8):
mrf = np.loadtxt("UnitTest_Data/cluster_"+str(i)+".txt",delimiter=',')
try:
np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3)
except AssertionError:
#Test failed
self.assertTrue(1==0)
def test_multiExample(self):
fname = "example_data.txt"
ticc = TICC(window_size = 5,number_of_clusters = 5, lambda_parameter = 11e-2, beta = 600, maxIters = 100,
threshold = 2e-5, write_out_file = False, prefix_string = "output_folder/", num_proc=1)
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
assign = np.loadtxt("UnitTest_Data/multiResults.txt")
val = abs(assign - cluster_assignment)
self.assertEqual(sum(val), 0)
for i in range(5):
mrf = np.loadtxt("UnitTest_Data/multiCluster_"+str(i)+".txt",delimiter=',')
try:
np.testing.assert_array_almost_equal(mrf, cluster_MRFs[i], decimal=3)
except AssertionError:
#Test failed
self.assertTrue(1==0)
def test_biased_vs_unbiased(self):
fname = "example_data.txt"
unbiased_ticc = TICC(window_size=1, number_of_clusters=8, lambda_parameter=11e-2, beta=600, maxIters=100,
threshold=2e-5,
write_out_file=False, prefix_string="output_folder/", num_proc=1)
(unbiased_cluster_assignment, unbiased_cluster_MRFs) = unbiased_ticc.fit(input_file=fname)
biased_ticc = TICC(window_size=1, number_of_clusters=8, lambda_parameter=11e-2, beta=600, maxIters=100,
threshold=2e-5,
write_out_file=False, prefix_string="output_folder/", num_proc=1, biased=True)
(biased_cluster_assignment, biased_cluster_MRFs) = biased_ticc.fit(input_file=fname)
np.testing.assert_array_equal(np.array(biased_cluster_assignment), np.array(unbiased_cluster_assignment), "Biased assignment is not equel to unbiased assignment!")
def test_failed_unbiased(self):
with self.assertRaises(Exception) as context:
# TICC will fail in Iteration 2, because cluster 9 has only one observation.
fname = "example_data.txt"
ticc = TICC(window_size=1, number_of_clusters=50, lambda_parameter=11e-2, beta=600, maxIters=100,
threshold=2e-5,
write_out_file=False, prefix_string="output_folder/", num_proc=1)
(cluster_assignment, cluster_MRFs) = ticc.fit(input_file=fname)
self.assertTrue('This is broken {}'.format(context.exception))
if __name__ == '__main__':
unittest.main()