Merge ebb00e5 into 316f25f

DOI-USGS · Nov 6, 2018 · 04428a0 · 04428a0
2 parents 316f25f + ebb00e5
commit 04428a0
Show file tree

Hide file tree

Showing 4 changed files with 171 additions and 9 deletions.
diff --git a/plio/examples/SocetGXP/InSightE08_string_id.gpf b/plio/examples/SocetGXP/InSightE08_string_id.gpf
@@ -0,0 +1,18 @@
+GROUND POINT FILE
+3
+point_id,stat,known,lat_Y_North,long_X_East,ht,sig(3),res(3)
+1 1 1
+0.08538133869187         2.38013146162178         -2697.23744694649990    
+0.000000 0.000000 1.707214
+16.348041 -13.917623 -0.151001
+
+02 1 1
+0.08508542650082         2.38081358557147         -2702.00000000000000    
+0.000000 0.000000 1.000000
+15.983286 -16.869508 0.103341
+
+3 1 1
+0.08626480095809         2.38708383926110         -2714.16003756809000    
+0.000000 0.000000 3.828854
+12.831839 -34.360639 -2.205846
+
diff --git a/plio/examples/SocetSet/example_string_id_ipf.ipf b/plio/examples/SocetSet/example_string_id_ipf.ipf
@@ -0,0 +1,21 @@
+IMAGE POINT FILE
+3
+pt_id,val,fid_val,no_obs,l.,s.,sig_l,sig_s,res_l,res_s,fid_x,fid_y
+1 1 0 0
+-4058.982422  -2318.010742
+0.000000  0.000000
+-0.062556  -0.214713
+0.000000  0.000000
+
+02 1 0 0
+-3969.065186  -606.849243
+0.000000  0.000000
+0.228660  0.105249
+0.000000  0.000000
+
+3 1 0 0
+-1019.739014  -2300.877197
+0.000000  0.000000
+-0.025129  -0.002447
+0.000000  0.000000
+
diff --git a/plio/io/io_bae.py b/plio/io/io_bae.py
@@ -110,8 +110,14 @@ def read_ipf_str(input_data):
 
     assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df))
 
-    # Soft conversion of numeric types to numerics, allows str in first col for point_id
-    df = df.apply(pd.to_numeric, errors='ignore')
+    # List of data types for columns in Socet set IPF file
+    col_dtype = ['str','int32','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64']
+
+    # Build dict of column names and their data types
+    dtype_dict = dict(zip(columns, col_dtype))
+
+    # Hard conversion of data types to ensure 'pt_id' is treated as string, 'val', 'fid_val', 'no_obs' flags treated as int
+    df = df.astype(dtype_dict)
 
     return df
 
@@ -141,7 +147,7 @@ def read_ipf_list(input_data_list):
 
 def save_ipf(df, output_path):
     """
-    Write a socet gpf file from a gpf-defined pandas dataframe
+    Write a socet ipf file from an ipf-defined pandas dataframe
 
     Parameters
     ----------
@@ -235,8 +241,14 @@ def read_gpf(input_data):
 
     df = pd.DataFrame(d, columns=columns)
 
-    # Soft conversion of numeric types to numerics, allows str in first col for point_id
-    df = df.apply(pd.to_numeric, errors='ignore')
+    # List of data types for columns in Socet set GPF file
+    col_dtype = ['str','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64','float64']
+
+    # Build dict of column names and their data types
+    dtype_dict = dict(zip(columns, col_dtype))
+
+    # Hard conversion of data types to ensure 'point_id' is treated as string and 'stat' and 'known' flags treated as int
+    df = df.astype(dtype_dict)
 
     # Validate the read data with the header point count
     assert int(cnt) == len(df), 'Dataframe length {} does not match point length {}.'.format(int(cnt), len(df))

diff --git a/plio/io/tests/test_io_bae.py b/plio/io/tests/test_io_bae.py
@@ -14,25 +14,57 @@
 def insight_gpf():
     return get_path('InSightE08_XW.gpf')
 
+@pytest.fixture
+def example_str_id_gpf():
+    return get_path('InSightE08_string_id.gpf')
+
 @pytest.fixture()
 def insight_expected_gpf():
-    return pd.read_csv(get_path('InSightE08_XW.csv'))
+    dtype_dict = {'point_id': 'str',
+               'stat': 'int32',
+               'known': 'int32',
+               'lat_Y_North': 'float64',
+               'long_X_East': 'float64',
+               'ht': 'float64',
+               'sig0': 'float64',
+               'sig1': 'float64',
+               'sig2': 'float64',
+               'res0': 'float64',
+               'res1': 'float64',
+               'res2': 'float64'}
+    return pd.read_csv(get_path('InSightE08_XW.csv'), dtype=dtype_dict)
 
 @pytest.fixture
 def insight_ipf():
     return get_path('P20_008845_1894_XN_09N203W.ipf')
 
+@pytest.fixture
+def example_str_id_ipf():
+    return get_path('example_string_id_ipf.ipf')
+
 @pytest.fixture()
 def insight_expected_ipf():
-    return pd.read_csv(get_path('P20_008845_1894_XN_09N203W.csv'))
+    dtype_dict = {'pt_id': 'str',
+                  'val': 'int32',
+                  'fid_val': 'int32',
+                  'no_obs': 'int32',
+                  'l.': 'float64',
+                  's.': 'float64',
+                  'sig_l': 'float64',
+                  'sig_s': 'float64',
+                  'res_l': 'float64',
+                  'res_s': 'float64',
+                  'fid_x': 'float64',
+                  'fid_y': 'float64'}
+    return pd.read_csv(get_path('P20_008845_1894_XN_09N203W.csv'), dtype=dtype_dict)
 
 @pytest.mark.parametrize('ipf, expected', [([insight_ipf()],insight_expected_ipf())])
-def test_read_ifp(ipf, expected):
+def test_read_ipf(ipf, expected):
     df = read_ipf(ipf)
     assert_frame_equal(df, expected)
 
 @pytest.mark.parametrize('gpf, expected', [(insight_gpf(),insight_expected_gpf())])
-def test_read_gfp(gpf, expected):
+def test_read_gpf(gpf, expected):
     df = read_gpf(gpf)
     assert_frame_equal(df, expected)
 
@@ -63,6 +95,26 @@ def test_write_ipf(ipf, file):
 
     assert (truth_arr == test_arr).all()
 
+@pytest.mark.parametrize('ipf, file', [(example_str_id_ipf(), 'plio/io/tests/temp')])
+def test_write_str_id_ipf(ipf, file):
+    df = read_ipf(ipf)
+    save_ipf(df, file)
+    file = os.path.join(file, 'example_string_id_ipf.ipf')
+
+    with open(ipf) as f:
+        fl = f.readlines()
+
+    with open(file) as f:
+        fs = f.readlines()
+
+    # Quick check to make sure that length of IPF files matches
+    #  otherwise, the test that follows will be invalid
+    assert len(fl) == len(fs)
+
+    # Test that every 5th line (the lines containing the point ID and integer flags) matches
+    for i in range(3,len(fs),6):
+        assert fs[i] == fl[i]
+
 @pytest.mark.parametrize('gpf, file', [(insight_gpf(), 'out.gpf')])
 def test_write_gpf(gpf, file):
     """
@@ -89,6 +141,65 @@ def test_write_gpf(gpf, file):
 
     # np.testing.assert_array_almost_equal(truth_arr, test_arr)
 
+@pytest.mark.parametrize('gpf, file', [(example_str_id_gpf(), 'out.gpf')])
+def test_write_str_id_gpf(gpf, file):
+    """
+    This test makes sure that the point IDs of a GPF whose point IDs only contain numbers,
+    are written correctly when saving to disk
+    """
+    df = read_gpf(gpf)
+    save_gpf(df, file)
+
+    with open(gpf) as f:
+        fl = f.readlines()
+
+    with open(file) as f:
+        fs = f.readlines()
+
+    # Quick check to make sure that length of GPF files matches
+    #  otherwise, the test that follows will be invalid
+    assert len(fl) == len(fs)
+
+    # Test that every 5th line (the lines containing the point ID and integer flags) matches
+    for i in range(3,len(fs),5):
+        assert fs[i] == fl[i]
+
+@pytest.mark.parametrize('gpf', [(example_str_id_gpf())])
+def test_gpf_dtypes(gpf):
+    """
+    This test makes sure that a GPF whose point IDs only contain numbers
+    are always treated as strings after they're read in.
+    """
+    # Read the GPF file under test into a pandas dataframe
+    df = read_gpf(gpf)
+
+    # Truth list of column data types
+    truth_dtypes = ['O','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64','float64']
+
+    # Test list of column data types
+    test_dtypes = list(df.dtypes)
+
+    # Check that the type of each column matches the truth list
+    assert truth_dtypes == test_dtypes
+
+@pytest.mark.parametrize('ipf', [(example_str_id_ipf())])
+def test_ipf_dtypes(ipf):
+    """
+    This test makes sure that a IPF whose point IDs only contain numbers
+    are always treated as strings after they're read in.
+    """
+    # Read the IPF file under test into a pandas dataframe
+    df = read_ipf(ipf)
+
+    # Truth list of column data types
+    truth_dtypes = ['O','int32','int32','int32','float64','float64','float64','float64','float64','float64','float64','float64', 'O']
+
+    # Test list of column data types
+    test_dtypes = list(df.dtypes)
+
+    # Check that the type of each column matches the truth list
+    assert truth_dtypes == test_dtypes
+
 class TestISDFromSocetLis():
 
     def test_parse_with_empty_newlines(self):