Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions autosklearn/data/competition_c_functions.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def log_function(*args):
So far, only ascii #32 (space) is recognized as a whitespace. If the entries are tab-separated (or any other chararcter), this could easily be implemented here.

'''
def read_sparse_file(char *filename, int num_points,int num_features, int initial_length = 8192, int offset = -1, long max_memory_in_mb = 1048576):
def read_sparse_file(filename, int num_points,int num_features, int initial_length = 8192, int offset = -1, double max_memory_in_mb = 1048576):

#cdef np.ndarray[float, ndim=1]
data = np.zeros(initial_length,dtype=np.float32)
Expand All @@ -50,7 +50,9 @@ def read_sparse_file(char *filename, int num_points,int num_features, int initia
filename_byte_string = filename.encode("UTF-8")
fname = filename_byte_string
cfile = fopen(fname, "r")

if cfile == NULL:
raise RuntimeError("Couldn't find file {}".format(filename))

while True:
# read the column and the value and store it
read =fscanf(cfile, "%i:%f",&j,&v)
Expand Down Expand Up @@ -103,7 +105,7 @@ def read_sparse_file(char *filename, int num_points,int num_features, int initia
see read_sparse_file, only difference: the value of every index present is 1, so there are no index:value pairs, but just indices.

'''
def read_sparse_binary_file(char *filename, int num_points, int num_features, int initial_length = 8192, int offset = -1, long max_memory_in_mb = 1048576):
def read_sparse_binary_file(filename, int num_points, int num_features, int initial_length = 8192, int offset = -1, double max_memory_in_mb = 1048576):

data = np.zeros(initial_length,dtype=np.bool)
indices = np.zeros(initial_length, dtype=np.int32)
Expand All @@ -124,7 +126,9 @@ def read_sparse_binary_file(char *filename, int num_points, int num_features, in
filename_byte_string = filename.encode("UTF-8")
fname = filename_byte_string
cfile = fopen(fname, "r")

if cfile == NULL:
raise RuntimeError("Couldn't find file {}".format(filename))

while True:
# read the column and the value and store it
read =fscanf(cfile, "%d",&j)
Expand Down Expand Up @@ -182,7 +186,7 @@ def read_sparse_binary_file(char *filename, int num_points, int num_features, in

The function does not check for EOF or missing values, so be cautious!
'''
def read_dense_file(filename, num_points, num_features, max_memory_in_mb = 1048576):
def read_dense_file(filename, int num_points, int num_features,double max_memory_in_mb = 1048576):

nbits = np.finfo(np.float32).nexp + np.finfo(np.float32).nmant+1
num_points = long(min(num_points,max_memory_in_mb*1024*1024*8/nbits/num_features))
Expand All @@ -200,7 +204,9 @@ def read_dense_file(filename, num_points, num_features, max_memory_in_mb = 10485
filename_byte_string = filename.encode("UTF-8")
fname = filename_byte_string
cfile = fopen(fname, "r")

if cfile == NULL:
raise RuntimeError("Couldn't find file {}".format(filename))

for i in range(num_points):
for j in range(num_features):
fscanf(cfile, "%f",&v)
Expand All @@ -222,6 +228,9 @@ def read_dense_file_unknown_width(filename, num_points, max_memory_in_mb = 10485
filename_byte_string = filename.encode("UTF-8")
fname = filename_byte_string
cfile = fopen(fname, "r")
if cfile == NULL:
raise RuntimeError("Couldn't find file {}".format(filename))


#count the number of columns in the first line
rc = fgetc(cfile)
Expand Down
3 changes: 3 additions & 0 deletions test/data/test_competition_c_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import unittest


import autosklearn.data.competition_c_functions as competition_c_functions

class CHelperFunctionTest(unittest.TestCase):
Expand Down Expand Up @@ -35,3 +36,5 @@ def test_read_dense(self):
filename, 670, 20, 0.01)
self.assertEqual(data.shape, (131, 20))

if __name__ == "__main__":
unittest.main()