Skip to content

Commit

Permalink
Merging in test_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
dimazest committed Oct 20, 2013
2 parents 214bfdf + 62e9f0d commit 83eef0d
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 144 deletions.
49 changes: 27 additions & 22 deletions src/composes/utils/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,18 @@
from composes.utils.gen_utils import assert_valid_kwargs
import struct


def save(object_, file_name):
create_parent_directories(file_name)
try:
with open(file_name,'w') as f:
with open(file_name, 'w') as f:
pickle.dump(object_, f, 2)
except struct.error:
warn("object is too big, using pickle with protocol 0")
with open(file_name,'w') as f:
with open(file_name, 'w') as f:
pickle.dump(object_, f, 0)


def load(file_name, data_type=None):
with open(file_name) as f:
result = pickle.load(f)
Expand All @@ -35,17 +37,18 @@ def load(file_name, data_type=None):

return result


def create_directories(directory):
if not os.path.exists(directory):
os.makedirs(directory)


def create_parent_directories(file_name):
parts = file_name.split("/")
if (len(parts) > 1):
parent_dir = "/".join(parts[0:-1])
if (not os.path.exists(parent_dir)):
print "Creating %s" % parent_dir
os.makedirs(parent_dir)
parent_dir = os.path.dirname(file_name)

if not os.path.exists(parent_dir):
os.makedirs(parent_dir)


def extract_indexing_structs(filename, field_list):
str2id = {}
Expand Down Expand Up @@ -127,6 +130,7 @@ def read_list(file_name, **kwargs):
result.append(line)
return result


def read_sparse_space_data(matrix_file, row2id, column2id, **kwargs):

if matrix_file.endswith(".gz"):
Expand All @@ -137,8 +141,8 @@ def read_sparse_space_data(matrix_file, row2id, column2id, **kwargs):
no_lines = sum(1 for line in f if line.strip() != "")
f.close()

row = np.zeros(no_lines, dtype = np.int32)
col = np.zeros(no_lines, dtype = np.int32)
row = np.zeros(no_lines, dtype=np.int32)
col = np.zeros(no_lines, dtype=np.int32)

assert_valid_kwargs(kwargs, ["dtype"])

Expand Down Expand Up @@ -179,12 +183,13 @@ def read_sparse_space_data(matrix_file, row2id, column2id, **kwargs):
row = row[0:i]
col = col[0:i]

m = SparseMatrix(csr_matrix( (data,(row,col)), shape = (len(row2id), len(column2id))))
m = SparseMatrix(csr_matrix((data, (row, col)), shape=(len(row2id), len(column2id))))
if m.mat.nnz != i:
warn("Found 0-counts or duplicate row,column pairs. (Duplicate entries are summed up.)")

return m


def read_dense_space_data(matrix_file, row2id, **kwargs):
#get number of rows and columns
if matrix_file.endswith(".gz"):
Expand All @@ -195,8 +200,7 @@ def read_dense_space_data(matrix_file, row2id, **kwargs):
first_line = f.next()
no_cols = len(first_line.strip().split()) - 1
if no_cols <= 0:
raise ValueError("Invalid row: %s, expected at least %d fields"
% (first_line.strip(), 2))
raise ValueError("Invalid row: %s, expected at least %d fields" % (first_line.strip(), 2))
f.close()

no_rows = len(row2id)
Expand All @@ -209,7 +213,7 @@ def read_dense_space_data(matrix_file, row2id, **kwargs):
else:
element_type = np.double

m = np.mat(np.zeros(shape=(no_rows, no_cols), dtype = element_type))
m = np.mat(np.zeros(shape=(no_rows, no_cols), dtype=element_type))

if matrix_file.endswith(".gz"):
f = gzip.open(matrix_file, "rb")
Expand All @@ -228,21 +232,22 @@ def read_dense_space_data(matrix_file, row2id, **kwargs):
if word in row_string_set != 0:
warn("Found duplicate row: %s. Ignoring it." % word)
else:
m[i,:] = elements[1:]
m[i, :] = elements[1:]
row_string_set.add(word)

f.close()

return DenseMatrix(m)


def print_list(list_, file_name):
with open(file_name,'w') as f:
with open(file_name, 'w') as f:
for item in list_:
f.write(item + "\n")


def print_cooc_mat_sparse_format(matrix_, id2row, id2column, file_prefix):
matrix_file = "%s.%s" %(file_prefix, "sm")
matrix_file = "%s.%s" % (file_prefix, "sm")
if not id2column:
raise ValueError("Cannot print matrix with no column info in sparse format!")

Expand All @@ -259,20 +264,20 @@ def print_cooc_mat_sparse_format(matrix_, id2row, id2column, file_prefix):
row = id2row[0]
for i in xrange(len(data)):
while i == next_row:
row_index +=1
row_index += 1
next_row = row_indices[row_index + 1]
row = id2row[row_index]
col = id2column[col_indices[i]]
f.write("%s\t%s\t%f\n" %(row,col,data[i]))
f.write("%s\t%s\t%f\n" % (row, col, data[i]))
else:
for i in range(mat.shape[0]):
for j in range(mat.shape[1]):
if mat[i,j] != 0:
f.write("%s\t%s\t%f\n" %(id2row[i], id2column[j], mat[i,j]))
if mat[i, j] != 0:
f.write("%s\t%s\t%f\n" % (id2row[i], id2column[j], mat[i, j]))


def print_cooc_mat_dense_format(matrix_, id2row, file_prefix):
matrix_file = "%s.%s" %(file_prefix, "dm")
matrix_file = "%s.%s" % (file_prefix, "dm")

with open(matrix_file, 'w') as f:
for i, row in enumerate(id2row):
Expand Down
9 changes: 5 additions & 4 deletions src/pipelines/build_peripheral_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
logger = logging.getLogger("test vector space construction pipeline")



def usage(errno=0):
print >>sys.stderr,\
"""Usage:
Expand Down Expand Up @@ -88,6 +87,7 @@ def build_space(in_file_prefix, in_format, out_dir, out_format, core_space_file,
raw_per_space = build_raw_per_space(in_file_prefix, in_format, is_gz)
transform_raw_per_space(raw_per_space, in_file_prefix, out_dir, out_format, core_space_file)


def transform_raw_per_space(raw_per_space, in_file_prefix, out_dir, out_format, core_space_file):

in_file_descr = "PER_SS." + in_file_prefix.split("/")[-1]
Expand All @@ -103,6 +103,7 @@ def transform_raw_per_space(raw_per_space, in_file_prefix, out_dir, out_format,
if not out_format is None:
space.export(out_file_prefix, format=out_format)


def build_space_batch(in_file_prefix, in_format, out_dir, out_format,
core_in_dir, core_filter, is_gz):

Expand Down Expand Up @@ -135,7 +136,7 @@ def main(sys_argv):
out_dir = None
in_file_prefix = None
core_space_file = None
log_file = None
log_file = './build_core_space.log'
in_format = None
out_format = None
core_in_dir = None
Expand All @@ -144,7 +145,7 @@ def main(sys_argv):

section = "build_peripheral_space"

if (len(argv) == 1):
if len(argv) == 1:
config_file = argv[0]
config = ConfigParser()
config.read(config_file)
Expand All @@ -153,7 +154,7 @@ def main(sys_argv):
core_space_file = utils.config_get(section, config, "core", None)
core_in_dir = utils.config_get(section, config, "core_in_dir", None)
core_filter = utils.config_get(section, config, "core_filter", "")
log_file = utils.config_get(section, config, "log", None)
log_file = utils.config_get(section, config, "log", './build_core_space.log')
in_format = utils.config_get(section, config, "input_format", None)
out_format = utils.config_get(section, config, "output_format", None)
gz = utils.config_get(section, config, "gz", gz)
Expand Down
44 changes: 21 additions & 23 deletions src/unitest/bps_pipeline_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@
@author: Georgiana Dinu, Pham The Nghia
'''
import unittest

import numpy as np
from unitest import data_dir

from pipelines import build_peripheral_space as bps
from pipelines import build_core_space as bcs
from composes.semantic_space.space import Space

class Test(unittest.TestCase):
from unitest import data_dir
import pytest


class Test(unittest.TestCase):

def setUp(self):
self.dir_ = data_dir + "pipelines_test_resources/"

def tearDown(self):
pass

def _test_equal_spaces_structs(self, sp, new_sp):
self.assertListEqual(sp.id2row, new_sp.id2row)
self.assertListEqual(sp.id2column, new_sp.id2column)
Expand All @@ -28,19 +29,23 @@ def _test_equal_spaces_structs(self, sp, new_sp):
def _test_equal_spaces_dense(self, sp, new_sp):

self._test_equal_spaces_structs(sp, new_sp)
np.testing.assert_array_almost_equal(sp.cooccurrence_matrix.mat,
new_sp.cooccurrence_matrix.mat, 6)
np.testing.assert_array_almost_equal(sp.cooccurrence_matrix.mat, new_sp.cooccurrence_matrix.mat, 6)

def _test_equal_spaces_sparse(self, sp, new_sp):

self._test_equal_spaces_structs(sp, new_sp)
np.testing.assert_array_almost_equal(sp.cooccurrence_matrix.mat.todense(),
new_sp.cooccurrence_matrix.mat.todense(), 6)
np.testing.assert_array_almost_equal(sp.cooccurrence_matrix.mat.todense(), new_sp.cooccurrence_matrix.mat.todense(), 6)

def test_raises(self):
with pytest.raises(SystemExit):
bps.main(["build_peripheral_space.py", "-h"])

self.assertRaises(SystemExit, bps.main,["build_peripheral_space.py","-h"])
self.assertRaises(SystemExit, bps.main,["build_peripheral_space.py","-l","-h"])
with pytest.raises(SystemExit):
bps.main([
"build_peripheral_space.py",
"-l", '/tmp/test_build_peripheral_space.log',
"-h",
])

def tttest_simple_sparse_batch(self):

Expand All @@ -56,7 +61,7 @@ def tttest_simple_sparse_batch(self):

s1 = Space.build(data=self.dir_ + "mat1.sm",
cols=self.dir_ + "mat1.cols",
format = "sm")
format="sm")
s2 = Space.build(data=self.dir_ + "PER_SS.mat1.CORE_SS.mat1.sm",
cols=self.dir_ + "PER_SS.mat1.CORE_SS.mat1.cols",
format="sm")
Expand All @@ -80,7 +85,7 @@ def test_simple_sparse(self):

s1 = Space.build(data=self.dir_ + "mat1.sm",
cols=self.dir_ + "mat1.cols",
format = "sm")
format="sm")
s2 = Space.build(data=self.dir_ + "PER_SS.mat1.CORE_SS.mat1.sm",
cols=self.dir_ + "PER_SS.mat1.CORE_SS.mat1.cols",
format="sm")
Expand All @@ -96,7 +101,7 @@ def test_simple_dense(self):
"--input_format", "dm",
"--output_format", "dm"
])
s1 = Space.build(data=self.dir_ + "mat2.dm", format = "dm")
s1 = Space.build(data=self.dir_ + "mat2.dm", format="dm")
s2 = Space.build(data=self.dir_ + "PER_SS.mat2.CORE_SS.mat2.dm", format="dm")

self._test_equal_spaces_dense(s1, s2)
Expand Down Expand Up @@ -128,7 +133,7 @@ def test_simple_ops(self):
bps.main(["build_peripheral_space.py",
"-l", self.dir_ + "log1.txt",
"-i", self.dir_ + "mat3",
"-o", self.dir_ ,
"-o", self.dir_,
"-c", self.dir_ + core_mat + ".pkl",
"--input_format", "dm",
"--output_format", "dm"
Expand All @@ -142,7 +147,7 @@ def test_simple_ops(self):
bps.main(["build_peripheral_space.py",
"-l", self.dir_ + "log1.txt",
"-i", self.dir_ + "mat3",
"-o", self.dir_ ,
"-o", self.dir_,
"-c", self.dir_ + core_mat + ".pkl",
"--input_format", "sm",
"--output_format", "dm"
Expand All @@ -153,10 +158,3 @@ def test_simple_ops(self):
s2 = Space.build(data=data_file, format="dm")

self._test_equal_spaces_dense(s1, s2)



if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

29 changes: 29 additions & 0 deletions src/unitest/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

import py
import pytest


@pytest.fixture
def toolkit_dir():
return py.path.local(__file__).dirpath().join('..', '..')


@pytest.fixture
def data_dir(toolkit_dir):
return toolkit_dir.join('resource', 'unittest')


@pytest.fixture
def config_dir(tmpdir):
return tmpdir.mkdir('config')


@pytest.fixture
def pipelines_test_resources(data_dir):
return data_dir.join('pipelines_test_resources')


@pytest.fixture
def sim_input(pipelines_test_resources):
return str(pipelines_test_resources.join('sim_input.txt'))

0 comments on commit 83eef0d

Please sign in to comment.