Merge pull request #75 from fgnt/viz-orc

Add orc and tcorc alignment to visualization
fgnt · Apr 16, 2024 · 92d6b45 · 92d6b45
2 parents e294aed + de85842
commit 92d6b45
Show file tree

Hide file tree

Showing 5 changed files with 85 additions and 33 deletions.
diff --git a/meeteval/viz/__main__.py b/meeteval/viz/__main__.py
@@ -135,6 +135,10 @@ def get_wer(v):
                                 with tag('td'):
                                     with tag('a', href=f'{session_id}.html'):
                                         doc.text('View SideBySide')
+                                with tag('td'):
+                                    tags = '&'.join(f'{session_id}_{i}_{a}' for i, a in v.keys())
+                                    with tag('a', href=f'side_by_side_sync.html?{tags}'):
+                                        doc.text('View SydeBySide Synced')
             doc.asis('''
 <script>
     $(document).ready(function() {
@@ -144,6 +148,9 @@ def get_wer(v):
 </script>
             ''')
 
+    import shutil
+    shutil.copy(Path(__file__).parent / 'side_by_side_sync.html', out / 'side_by_side_sync.html')
+
     with open(out / "index.html", "w") as text_file:
         text_file.write(indent(doc.getvalue()))
     print(f'Open {(out / "index.html").absolute()}')
@@ -196,7 +203,7 @@ def add_argument(self, command_parser, name, p):
             if name == 'alignment':
                 command_parser.add_argument(
                     '--alignment',
-                    choices=['tcp', 'cp', 'tcp,cp', 'cp,tcp'],
+                    choices=['tcp', 'cp', 'tcp,cp', 'cp,tcp', 'tcorc', 'orc'],
                     help='Specifies which alignment is used.\n'
                          '- cp: Find the permutation that minimizes the cpWER and use the "classical" alignment.\n'
                          '- tcp: Find the permutation that minimizes the tcpWER and use a time constraint alignment.'

diff --git a/meeteval/viz/visualize.css b/meeteval/viz/visualize.css
@@ -218,7 +218,7 @@ visible area.
     white-space: pre-wrap;
     background-color: #555;
     color: #fff;
-    text-align: center;
+    text-align: left;
     border-radius: 6px;
     padding: 5px;
     position: absolute;

diff --git a/meeteval/viz/visualize.js b/meeteval/viz/visualize.js
@@ -268,13 +268,14 @@ function alignment_visualization(
         match_width: 10,
         audio_server: 'http://localhost:7777',
         syncID: null,
+        encodeURL: true,
     }
 ) {
     var urlParams = new URLSearchParams(window.location.search);
-    if (urlParams.has('minimaps')) {
+    if (settings.encodeURL && urlParams.has('minimaps')) {
         settings.minimaps.number = urlParams.get('minimaps')
     }
-    if (urlParams.has('regex')) {
+    if (settings.encodeURL && urlParams.has('regex')) {
         settings.search_bar.initial_query = urlParams.get('regex');
     }
 
@@ -362,7 +363,7 @@ function alignment_visualization(
         state.viewAreas = [];
         var urlParams = new URLSearchParams(window.location.search);
         if (finalViewArea === null) {
-            if (urlParams.has('selection')) {
+            if (settings.encodeURL && urlParams.has('selection')) {
                 console.log("Setting selection from URL", urlParams.get('selection'));
                 finalViewArea = parseSelection(urlParams.get('selection'));
                 if (!finalViewArea) {
@@ -375,11 +376,14 @@ function alignment_visualization(
         }
 
         for (let i = 0; i < settings.minimaps.number; i++) {
-            const viewArea = interpolate(domain, finalViewArea, i, settings.minimaps.number);
+            const viewArea = interpolate([...domain], [...finalViewArea], i, settings.minimaps.number);
             state.viewAreas.push(viewArea);
         }
-        state.viewAreas.push(finalViewArea);
+        state.viewAreas.push([...finalViewArea]);
         state.dirty = state.viewAreas.map(() => true);
+
+        // Reset filtered words after initialization
+        state.filteredWords = [];
     }
 
     initializeViewAreas(time_domain);
@@ -430,7 +434,7 @@ function alignment_visualization(
         }, drawTracker, 20);
 
          // Update URL
-        call_delayed_throttled(
+        if (settings.encodeURL) call_delayed_throttled(
             () => {
                 const selection = state.viewAreas[state.viewAreas.length - 1];
                 set_url_param('selection', `${selection[0].toFixed(1)}-${selection[1].toFixed(1)}`)
@@ -1025,7 +1029,16 @@ class CanvasPlot {
                 hr1.append("th");
                 hr1.append("th");
                 hr1.append("th");
-                hr1.append("th").text("Counts by Speaker").attr("colspan", Object.keys(wer_by_speakers).length).style("border-bottom", "1px solid white");
+
+                // Determine header from alignment type. If it contians orc, write by stream, otherwise, write by spekaer
+                let breakdownHeader;
+                if (info.alignment_type.includes("orc")) {
+                    breakdownHeader = "Counts by Stream";
+                } else {
+                    breakdownHeader = "Counts by Speaker";
+                }
+
+                hr1.append("th").text(breakdownHeader).attr("colspan", Object.keys(wer_by_speakers).length).style("border-bottom", "1px solid white");
 
                 const hr = head.append("tr")
                 hr.append("th").text("");
@@ -1068,11 +1081,20 @@ class CanvasPlot {
             }
         });
         label("Alignment:", info.alignment_type, null,
-            c => c.append('div').classed('wrap-40', true).text("The alignment algorithm used to generate this visualization. Available are:\n" +
-            "cp: concatenated minimum-permutation\n" +
-            "tcp: time-constrained minimum permutation\n\n" +
-            "(This setting cannot be changed interactively, but has to be selected when generating the visualization)\n" +
-            "Check the documentation for details")
+            c => c.append('div').classed('wrap-60', true).html("The alignment algorithm used to generate this visualization. Available are:" +
+            "<ul>" +
+            "<li><code>cp</code>: concatenated minimum-permutation</li>" +
+            "<li><code>tcp</code>: time-constrained minimum permutation</li>" +
+            "<li><code>orc</code>: (speaker-agnostic) optimal reference combination</li>" +
+            "<li><code>tcorc</code>: (speaker-agnostic) time-constrained optimal reference combination</li>" +
+            "</ul>" +
+            "<p>All visualizations are generated with <code>reference_sort='segment'</code> and <code>hypothesis_sort='segment'</code>. " +
+            "Time-constrained alignments are generated with <code>collar=5</code>, <code>ref_pseudo_word_level_timing='character_based'</code> and " +
+            "<code>hyp_pseudo_word_level_timing='character_based_points'</code>. " +
+            "Word lengths for the visualization are determined with the <code>'character_based'</code> strategy.</p>" +
+            "<p>This setting has to be selected when generating the visualization. " +
+            "Check the documentation for details.</p>"
+        )
         )
         if (info.wer.reference_self_overlap?.overlap_rate) label(
             "Reference self-overlap:",
@@ -2074,8 +2096,8 @@ class CanvasPlot {
             // Note: Deleting and adding an audio with the same content doesn't
             //       trigger a load. Hence, no optimization necessary.
             audio_div.selectAll("*").remove();
-            let lower = rangeSelector.selection[0];
-            let upper = rangeSelector.selection[1];
+            let lower = state.viewAreas[state.viewAreas.length - 1][0];
+            let upper = state.viewAreas[state.viewAreas.length - 1][1];
             let range = lower + " - " + upper;
             let path = settings.audio_server + "/" + file_path.node().value + "?start=" + lower + "&stop=" + upper;
             if ( parseFloat(lower) < parseFloat(upper) ){
@@ -2089,9 +2111,9 @@ class CanvasPlot {
             }
         };
         let maybe_remove_audio = function (){
-            if (rangeSelector.selection){
-                let lower = rangeSelector.selection[0];
-                let upper = rangeSelector.selection[1];
+            if (state.viewAreas[state.viewAreas.length - 1]){
+                let lower = state.viewAreas[state.viewAreas.length - 1][0];
+                let upper = state.viewAreas[state.viewAreas.length - 1][1];
                 let path = settings.audio_server + "/" + file_path.node().value + "?start=" + lower + "&stop=" + upper;
                 let audio = audio_div.select("audio")
                 if ( ! audio.empty() ){

diff --git a/meeteval/viz/visualize.py b/meeteval/viz/visualize.py
@@ -241,6 +241,38 @@ def get_visualization_data(ref: SegLST, *hyp: SegLST, assignment='tcp', alignmen
     ref = asseglst(ref)
     hyp = [asseglst(h) for h in hyp]
 
+    data = {
+        'info': {
+            'filename': ref[0]['session_id'],
+            'alignment_type': assignment,
+            'length': max([e['end_time'] for e in hyp[0] + ref]) - min([e['start_time'] for e in hyp[0] + ref]),
+        }
+    }
+
+    # Solve assignment when assignment is tcorc or orc
+    if assignment == 'tcorc':
+        assert len(hyp) == 1, len(hyp)
+        from meeteval.wer.wer.time_constrained_orc import time_constrained_orc_wer
+        # The visualization looks wrong if we don't sort segments
+        wer = time_constrained_orc_wer(
+            ref, *hyp,
+            collar=5,
+            reference_sort='segment',
+            hypothesis_sort='segment',
+            reference_pseudo_word_level_timing='character_based',
+            hypothesis_pseudo_word_level_timing='character_based_points',
+        )
+        ref, hyp = wer.apply_assignment(ref, *hyp)
+        hyp = (hyp,)
+        assignment = 'tcp'
+    elif assignment == 'orc':
+        assert len(hyp) == 1, len(hyp)
+        from meeteval.wer.wer.orc import orc_word_error_rate
+        wer = orc_word_error_rate(ref, *hyp)
+        ref, hyp = wer.apply_assignment(ref, *hyp)
+        hyp = (hyp,)
+        assignment = 'cp'
+
     assert len(hyp) > 0, hyp
     if alignment_transform is None:
         alignment_transform = lambda x: x
@@ -265,16 +297,6 @@ def get_visualization_data(ref: SegLST, *hyp: SegLST, assignment='tcp', alignmen
 
     u = ref + hyp
 
-    data = {
-        'info': {
-            'filename': ref[0]['session_id'],
-            'speakers': list(ref.unique('speaker')),
-            'alignment_type': assignment,
-            'length': max([e['end_time'] for e in u]) - min([e['start_time'] for e in u]),
-            'num_hypotheses': len(hyp),
-        }
-    }
-
     # Sort by begin time. Otherwise, the alignment will be unintuitive and likely not what the user wanted
     u = u.sorted('start_time')
 
@@ -379,7 +401,7 @@ def wer_by_speaker(hypothesis_key, speaker):
     data['info']['wer_by_speakers'] = {
         k: {
             speaker: wer_by_speaker(k, speaker)
-            for speaker in data['info']['speakers']
+            for speaker in list(ref.unique('speaker'))
         }
         for k in hypothesis_keys
     }
@@ -497,7 +519,7 @@ def _iypnb_html_(self):
                     height: 80vh; /* 80% of the window height roughly aligns with the visible height in a typical notebook setup */
                 }}
             </style>
-            {self.html()}
+            {self.html(encode_url=False)}
             </html>
             '''
 
@@ -545,7 +567,7 @@ def func(session_id, alignment):
         else:
             display(HTML(self._iypnb_html_()))
 
-    def html(self):
+    def html(self, encode_url=True):
         """
         Creates a visualization in HTML format.
 
@@ -623,6 +645,7 @@ def load_cdn(name, url):
                             match_width: 0.1,
                             syncID: {dumps_json(self.sync_id, default='null')},
                             audio_server: 'http://localhost:7777',
+                            encodeURL: {'true' if encode_url else 'false'},
                         }}
                     );
                     else setTimeout(exec, 100);

diff --git a/setup.py b/setup.py
@@ -124,7 +124,7 @@
         'Cython'
     ],
     extras_require=extras_require,
-    package_data={'meeteval': ['**/*.pyx', '**/*.h', '**/*.js', '**/*.css']},  # https://stackoverflow.com/a/60751886
+    package_data={'meeteval': ['**/*.pyx', '**/*.h', '**/*.js', '**/*.css', '**/*.html']},  # https://stackoverflow.com/a/60751886
     entry_points={
         'console_scripts': [
             'meeteval-wer=meeteval.wer.__main__:cli',