diff --git a/autosklearn/data/competition_c_functions.pyx b/autosklearn/data/competition_c_functions.pyx index f6403d4b34..1ed7e0c896 100644 --- a/autosklearn/data/competition_c_functions.pyx +++ b/autosklearn/data/competition_c_functions.pyx @@ -23,7 +23,7 @@ def log_function(*args): So far, only ascii #32 (space) is recognized as a whitespace. If the entries are tab-separated (or any other chararcter), this could easily be implemented here. ''' -def read_sparse_file(char *filename, int num_points,int num_features, int initial_length = 8192, int offset = -1, long max_memory_in_mb = 1048576): +def read_sparse_file(filename, int num_points,int num_features, int initial_length = 8192, int offset = -1, double max_memory_in_mb = 1048576): #cdef np.ndarray[float, ndim=1] data = np.zeros(initial_length,dtype=np.float32) @@ -50,7 +50,9 @@ def read_sparse_file(char *filename, int num_points,int num_features, int initia filename_byte_string = filename.encode("UTF-8") fname = filename_byte_string cfile = fopen(fname, "r") - + if cfile == NULL: + raise RuntimeError("Couldn't find file {}".format(filename)) + while True: # read the column and the value and store it read =fscanf(cfile, "%i:%f",&j,&v) @@ -103,7 +105,7 @@ def read_sparse_file(char *filename, int num_points,int num_features, int initia see read_sparse_file, only difference: the value of every index present is 1, so there are no index:value pairs, but just indices. ''' -def read_sparse_binary_file(char *filename, int num_points, int num_features, int initial_length = 8192, int offset = -1, long max_memory_in_mb = 1048576): +def read_sparse_binary_file(filename, int num_points, int num_features, int initial_length = 8192, int offset = -1, double max_memory_in_mb = 1048576): data = np.zeros(initial_length,dtype=np.bool) indices = np.zeros(initial_length, dtype=np.int32) @@ -124,7 +126,9 @@ def read_sparse_binary_file(char *filename, int num_points, int num_features, in filename_byte_string = filename.encode("UTF-8") fname = filename_byte_string cfile = fopen(fname, "r") - + if cfile == NULL: + raise RuntimeError("Couldn't find file {}".format(filename)) + while True: # read the column and the value and store it read =fscanf(cfile, "%d",&j) @@ -182,7 +186,7 @@ def read_sparse_binary_file(char *filename, int num_points, int num_features, in The function does not check for EOF or missing values, so be cautious! ''' -def read_dense_file(filename, num_points, num_features, max_memory_in_mb = 1048576): +def read_dense_file(filename, int num_points, int num_features,double max_memory_in_mb = 1048576): nbits = np.finfo(np.float32).nexp + np.finfo(np.float32).nmant+1 num_points = long(min(num_points,max_memory_in_mb*1024*1024*8/nbits/num_features)) @@ -200,7 +204,9 @@ def read_dense_file(filename, num_points, num_features, max_memory_in_mb = 10485 filename_byte_string = filename.encode("UTF-8") fname = filename_byte_string cfile = fopen(fname, "r") - + if cfile == NULL: + raise RuntimeError("Couldn't find file {}".format(filename)) + for i in range(num_points): for j in range(num_features): fscanf(cfile, "%f",&v) @@ -222,6 +228,9 @@ def read_dense_file_unknown_width(filename, num_points, max_memory_in_mb = 10485 filename_byte_string = filename.encode("UTF-8") fname = filename_byte_string cfile = fopen(fname, "r") + if cfile == NULL: + raise RuntimeError("Couldn't find file {}".format(filename)) + #count the number of columns in the first line rc = fgetc(cfile) diff --git a/test/data/test_competition_c_functions.py b/test/data/test_competition_c_functions.py index 7d3a3f166e..66807d4d99 100644 --- a/test/data/test_competition_c_functions.py +++ b/test/data/test_competition_c_functions.py @@ -3,6 +3,7 @@ import os import unittest + import autosklearn.data.competition_c_functions as competition_c_functions class CHelperFunctionTest(unittest.TestCase): @@ -35,3 +36,5 @@ def test_read_dense(self): filename, 670, 20, 0.01) self.assertEqual(data.shape, (131, 20)) +if __name__ == "__main__": + unittest.main()