Stricter NA handling in CSV parser

alubbock · Jul 5, 2019 · e173081 · e173081
1 parent 415bb85
commit e173081
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 1 deletion.
diff --git a/thunor/io.py b/thunor/io.py
@@ -595,6 +595,16 @@ def read_vanderbilt_hts(file_or_source, plate_width=24, plate_height=16,
     pm = PlateMap(width=plate_width, height=plate_height)
 
     # Sanity checks
+    columns_with_na = set(df.columns[df.isnull().any()])
+    columns_with_na = columns_with_na.difference({'drug1', 'drug2', 'drug3',
+                                                  'expt.id', 'expt.date'})
+
+    if len(columns_with_na) > 0:
+        raise PlateFileParseException(
+            'The following column(s) contain blank, NA, or NaN values: {}'
+            .format(', '.join(columns_with_na))
+        )
+
     try:
         if (df['cell.count'] < 0).any():
             raise PlateFileParseException('cell.count contains negative '

diff --git a/thunor/tests/test_io.py b/thunor/tests/test_io.py
@@ -104,7 +104,7 @@ def test_csv_two_drugs_drug2_blank_conc_specified(self):
 
     def test_csv_two_drugs_drug2_blank(self):
         csv = _check_csv(CSV_HEADER + ',drug2,drug2.units,drug2.conc'
-                         '\ncl1,0.00013,drug1,plate1,12,1234,A1,M,,M,')
+                         '\ncl1,0.00013,drug1,plate1,12,1234,A1,M,,M,0')
         # Second drug should get dropped, since it's empty
         assert len(csv.doses.index.get_level_values('drug')[0]) == 1
         assert len(csv.doses.index.get_level_values('dose')[0]) == 1