Add html format support in Detector Parameter Comparison (#582)

### Briefly, what does this PR introduce? A new table with some typo fixes is at https://eic.jlab.org/Geometry/Detector/Detector-20231013155913.html, but without csv format. Since we expect more of the fixes to come, adding a support for extracting html table would be easier to update the comparison reports. Related to issue #567 ### What kind of change does this PR introduce? - [ ] Bug fix (issue #__) - [ ] New feature (issue #__) - [ ] Documentation update - [x] Other: __ ### Please check if this PR fulfills the following: - [ ] Tests for the changes have been added - [ ] Documentation has been added / updated - [ ] Changes have been communicated to collaborators ### Does this PR introduce breaking changes? What changes might users need to make to their code? No ### Does this PR change default behavior? No --------- Co-authored-by: Chao Peng <cpeng@anl.gov>
eic · Oct 19, 2023 · a10c91e · a10c91e
1 parent 737f84f
commit a10c91e
Showing 1 changed file with 17 additions and 5 deletions.
diff --git a/bin/compare_detector_parameter_table b/bin/compare_detector_parameter_table
@@ -33,13 +33,16 @@ value_cols = [
 # read parameter table and do a little bit formatting
 def read_par_table(path, **kwargs):
     # read and format
-    dft = pd.read_csv(path, **kwargs)
+    if path.endswith('html') or path.endswith('htm'):
+        dft = pd.read_html(path, **kwargs)[0]
+    else:
+        dft = pd.read_csv(path, **kwargs)
     dft.columns = [i.replace(' \n', ' ').replace('\n', ' ').strip() for i in dft.columns.astype(str)]
     unnamed_cols = [i for i in dft.columns if i.startswith('Unnamed')]
     dft = dft.drop(columns=unnamed_cols)
     for col in dft.columns:
         if pd.api.types.is_string_dtype(dft[col].dtype):
-            dft.loc[:, col] = dft[col].str.replace(' \n', ' ').str.replace('\n', ' ').str.strip()
+            dft.loc[:, col] = dft[col].str.replace('\n', ' ').str.replace('  ', ' ').str.strip()
 
     missing_required_cols = [c for c in key_cols + value_cols if c not in dft.columns]
     if len(missing_required_cols) > 0:
@@ -66,11 +69,11 @@ if __name__ == '__main__':
             )
     parser.add_argument(
             'det_table',
-            help='Path or url to the DETECTOR parameter table.'
+            help='Path or url to the DETECTOR parameter table (csv or html).'
             )
     parser.add_argument(
             'sim_table',
-            help='Path or url to the SIMULATION parameter table.'
+            help='Path or url to the SIMULATION parameter table (csv or html).'
             )
     parser.add_argument(
             '--template',
@@ -170,8 +173,17 @@ if __name__ == '__main__':
         print(df_ext)
 
         print('These components are checked:')
-        print(json.dumps(comps, indent=4))
 
+        for comp in comps:
+            subdet = pd.DataFrame(columns=['det', 'sim', 'template_var', 'stat'], index=value_cols)
+            print('{}, {}, {}: '.format(comp['Region'], comp['Component'], comp['Sub-Component']))
+            for stat in ['Mismatched', 'Missing', 'Correct']:
+                for key, val in comp[stat].items():
+                    subdet.loc[key, 'stat'] = stat
+                    for k, v in val.items():
+                        subdet.loc[key, k] = v
+            print(subdet)
+            print(' ')
 
     # save reports to a json file
     report['stats'] = dict(