automl · mfeurer · Dec 15, 2015 · Nov 27, 2015 · Nov 27, 2015
diff --git a/autosklearn/data/competition_c_functions.pyx b/autosklearn/data/competition_c_functions.pyx
@@ -23,7 +23,7 @@ def log_function(*args):
     So far, only ascii #32 (space) is recognized as a whitespace. If the entries are tab-separated (or any other chararcter), this could easily be implemented here.
 
 '''
-def read_sparse_file(char *filename, int num_points,int num_features, int initial_length = 8192, int offset = -1, long max_memory_in_mb = 1048576):
+def read_sparse_file(filename, int num_points,int num_features, int initial_length = 8192, int offset = -1, double max_memory_in_mb = 1048576):
 
     #cdef np.ndarray[float, ndim=1] 
     data = np.zeros(initial_length,dtype=np.float32)
@@ -50,7 +50,9 @@ def read_sparse_file(char *filename, int num_points,int num_features, int initia
     filename_byte_string = filename.encode("UTF-8")
     fname = filename_byte_string
     cfile = fopen(fname, "r")
-
+    if cfile == NULL:
+        raise RuntimeError("Couldn't find file {}".format(filename))
+
     while True:
         # read the column and the value and store it
         read =fscanf(cfile, "%i:%f",&j,&v)
@@ -103,7 +105,7 @@ def read_sparse_file(char *filename, int num_points,int num_features, int initia
     see read_sparse_file, only difference: the value of every index present is 1, so there are no index:value pairs, but just indices.
 
 '''
-def read_sparse_binary_file(char *filename, int num_points, int num_features, int initial_length = 8192, int offset = -1, long max_memory_in_mb = 1048576):
+def read_sparse_binary_file(filename, int num_points, int num_features, int initial_length = 8192, int offset = -1, double max_memory_in_mb = 1048576):
 
     data = np.zeros(initial_length,dtype=np.bool)
     indices = np.zeros(initial_length, dtype=np.int32)
@@ -124,7 +126,9 @@ def read_sparse_binary_file(char *filename, int num_points, int num_features, in
     filename_byte_string = filename.encode("UTF-8")
     fname = filename_byte_string
     cfile = fopen(fname, "r")
-
+    if cfile == NULL:
+        raise RuntimeError("Couldn't find file {}".format(filename))
+
     while True:
         # read the column and the value and store it
         read =fscanf(cfile, "%d",&j)
@@ -182,7 +186,7 @@ def read_sparse_binary_file(char *filename, int num_points, int num_features, in
 
     The function does not check for EOF or missing values, so be cautious!
 '''
-def read_dense_file(filename, num_points, num_features, max_memory_in_mb = 1048576):
+def read_dense_file(filename, int num_points, int num_features,double max_memory_in_mb = 1048576):
 
     nbits = np.finfo(np.float32).nexp + np.finfo(np.float32).nmant+1
     num_points = long(min(num_points,max_memory_in_mb*1024*1024*8/nbits/num_features))
@@ -200,7 +204,9 @@ def read_dense_file(filename, num_points, num_features, max_memory_in_mb = 10485
     filename_byte_string = filename.encode("UTF-8")
     fname = filename_byte_string
     cfile = fopen(fname, "r")
-
+    if cfile == NULL:
+        raise RuntimeError("Couldn't find file {}".format(filename))
+
     for i in range(num_points):
         for j in range(num_features):
             fscanf(cfile, "%f",&v)
@@ -222,6 +228,9 @@ def read_dense_file_unknown_width(filename, num_points, max_memory_in_mb = 10485
     filename_byte_string = filename.encode("UTF-8")
     fname = filename_byte_string
     cfile = fopen(fname, "r")
+    if cfile == NULL:
+        raise RuntimeError("Couldn't find file {}".format(filename))
+
 
     #count the number of columns in the first line
     rc = fgetc(cfile)

diff --git a/test/data/test_competition_c_functions.py b/test/data/test_competition_c_functions.py
@@ -3,6 +3,7 @@
 import os
 import unittest
 
+
 import autosklearn.data.competition_c_functions as competition_c_functions
 
 class CHelperFunctionTest(unittest.TestCase):
@@ -35,3 +36,5 @@ def test_read_dense(self):
             filename, 670, 20, 0.01)
         self.assertEqual(data.shape, (131, 20))
 
+if __name__ == "__main__":
+    unittest.main()