Skip to content

Commit

Permalink
Merge pull request #267 from allenai/v2-pii-tagging
Browse files Browse the repository at this point in the history
Stack v4
  • Loading branch information
AkshitaB committed Sep 13, 2023
2 parents 5b4c68e + bff2981 commit d2abecd
Show file tree
Hide file tree
Showing 60 changed files with 17,445 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class InputSpec(Struct):
text: str
source: str
version: Optional[str] = None
metadata: Optional[Dict] = None


class OutputSpec(Struct):
Expand All @@ -41,32 +42,34 @@ class OutputSpec(Struct):


class Document:
__slots__ = "source", "version", "id", "text"
__slots__ = "source", "version", "id", "text", "metadata"

def __init__(self, source: str, id: str, text: str, version: Optional[str] = None) -> None:
def __init__(self, source: str, id: str, text: str, version: Optional[str] = None, metadata: Optional[Dict] = None) -> None:
self.source = source
self.version = version
self.id = id
self.text = text
self.metadata = metadata

@classmethod
def from_spec(cls, spec: InputSpec) -> "Document":
return Document(source=spec.source, version=spec.version, id=spec.id, text=spec.text)
return Document(source=spec.source, version=spec.version, id=spec.id, text=spec.text, metadata=spec.metadata)

def to_spec(self) -> InputSpec:
return InputSpec(source=self.source, version=self.version, id=self.id, text=self.text)
return InputSpec(source=self.source, version=self.version, id=self.id, text=self.text, metadata=self.metadata)

@classmethod
def from_json(cls, d: Dict) -> "Document":
return Document(source=d["source"], version=d["version"], id=d["id"], text=d["text"])
return Document(source=d["source"], version=d["version"], id=d["id"], text=d["text"], metadata=d["metadata"])

def to_json(self) -> Dict:
return {"source": self.source, "version": self.version, "id": self.id, "text": self.text}
return {"source": self.source, "version": self.version, "id": self.id, "text": self.text, "metadata": self.metadata}

def __str__(self) -> str:
return (
str(self.__class__.__name__)
+ f"(source={repr(self.source)},version={repr(self.version)},id={repr(self.id)},text={repr(self.text)})"
+ f"(metadata={repr(self.metadata)})"
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def predict(self, doc: Document) -> DocResult:

def tag(self, row: InputSpec) -> Dict[str, List[List[Union[int, float]]]]:
"""Internal function that is used by the tagger to get data"""
doc = Document(source=row.source, version=row.version, id=row.id, text=row.text)
doc = Document(source=row.source, version=row.version, id=row.id, text=row.text, metadata=row.metadata)
doc_result = self.predict(doc)

tagger_output: Dict[str, list] = {}
Expand Down
Loading

0 comments on commit d2abecd

Please sign in to comment.