Skip to content

Commit

Permalink
Remove manual override keys for uttid in ChatReader
Browse files Browse the repository at this point in the history
Resolves #30
  • Loading branch information
JeltevanBoheemen committed May 15, 2024
1 parent 46b3af6 commit b9ac270
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 13 deletions.
13 changes: 2 additions & 11 deletions corpus2alpino/readers/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@
"""
Module for reading CHAT cha files to parsable utterances.
"""
from typing import cast, Dict, Iterable, List, Tuple
from typing import cast, Dict, Iterable, List
from chamd import ChatReader as ChatParser, ChatLine, ChatTier

import os
import re

from corpus2alpino.abstracts import Reader
from corpus2alpino.models import CollectedFile, Document, MetadataValue, Utterance

MANUAL_IDS = ['xsid', 'xuid']
UTTERANCE_NUMBER_ID = 'uttno'


class ChatReader(Reader):
"""
Class for converting a CHAT file to document.
Expand All @@ -31,13 +29,6 @@ def parse_utterances(self, chat_lines: List[ChatLine]):
number = 0
for line in chat_lines:
number += 1 # start numbering utterances from 1
for id_override_key in MANUAL_IDS:
try:
line.uttid = line.tiers[id_override_key].text
line.metadata['uttid'].text = line.uttid
break
except KeyError:
pass

yield Utterance(line.text,
str(line.uttid),
Expand Down
4 changes: 2 additions & 2 deletions tests/example_chat_expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
##META text origutt = toen <ging zij &-eh haar &+ma ging zij> [//] wou zij de auto maken . 21792_23475
##META text parsefile = PRELAN_example_chat_u00000000002.xml
##META int uttendlineno = 10
##META int uttid = 42
##META int uttid = 2
##META int uttno = 2
##META int uttstartlineno = 9
##META text xsid = 42
42|toen wou zij de auto maken .
2|toen wou zij de auto maken .

##META text origutt = maar toen reed de auto er vandoor .
##META text parsefile = PRELAN_example_chat_u00000000003.xml
Expand Down

0 comments on commit b9ac270

Please sign in to comment.