In [1]:
import random
import typer
from pathlib import Path
import spacy
from spacy.tokens import DocBin, Doc
from spacy.training.example import Example
from rel_component.scripts.rel_pipe import make_relation_extractor, score_relations
from rel_component.scripts.rel_model import create_relation_model, create_classification_layer, create_instances, create_tensors

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
nlp = spacy.load("./ner_component/output/model-best")
nlp2 = spacy.load("./rel_component/training/model-best")
nlp2.add_pipe('sentencizer')

<spacy.pipeline.sentencizer.Sentencizer at 0x236a318c2c8>

In [19]:
text = [
    "Trolley cars on pendant carrier damaged", 
    "Replaced hook with new style latch and thrust bearing.", 
    "Replaced pendant festune C-Track and checked functions of the new c-track and it was good.", 
    "Jib is drifting with and without a load. Recommend leveling jib.",
    "Path blocked by pipe",
    "Radio controlled crane requires warning device.",
    "Load block is full of coal dust, recommend to disassemble, clean, lubricate. Reassemble, test run.",
    "Chain is worn and needs replaced."]

In [16]:
# For NER
for doc in nlp.pipe(text, disable=["tagger", "parser"]):
    print([(ent.text, ent.label_) for ent in doc.ents])

[('Trolley cars', 'Component'), ('pendant carrier', 'Component'), ('damaged', 'Status')]
[('Replaced', 'Status'), ('hook', 'Component'), ('style latch', 'Component'), ('thrust bearing', 'Component')]
[('Replaced', 'Status'), ('pendant', 'Component'), ('-', 'Component'), ('Track', 'Component'), ('c-', 'Component'), ('track', 'Component')]
[('Jib', 'Component'), ('drifting with and without a load', 'Status'), ('jib', 'Component')]
[('blocked by pipe', 'Status')]


In [35]:
# For SpanCat
nlp = spacy.load("./spancat_component/spancat_output/model-best")
for doc in nlp.pipe(text, disable=["tagger", "parser"]):
    print([(span_cat.text, span_cat.label_) for span_cat in doc.spans["sc"]])

[('damaged', 'Status'), ('Panel door', 'Component')]
[('leaking', 'Status'), ('Gear case', 'Component'), ('leaking.', 'Status')]
[('not functioning', 'Status')]
[('bent', 'Status'), ('Bridge', 'Component'), ('runway', 'Component'), ('end stop', 'Component'), ('missing bumpers', 'Status')]
[('Trolley bumpers', 'Component')]


In [22]:
# For REL
for doc in nlp.pipe(text, disable=["tagger", "parser"]):
    print("\n")
    print(doc.text)
    # print([(ent.text, ent.label_) for ent in doc.ents])
    for name, proc in nlp2.pipeline:
        doc = proc(doc)
    for value, rel_dict in doc._.rel.items():
        for sent in doc.sents:
            for e in sent.ents:
                for b in sent.ents:
                    if e.start == value[0] and b.start == value[1]:
                        if rel_dict['status_of'] >=0.5:
                            print(f"{b.text} -> {e.text}")




Trolley cars on pendant carrier damaged
Trolley cars -> damaged
pendant carrier -> damaged


Replaced hook with new style latch and thrust bearing.
hook -> Replaced
thrust bearing -> Replaced


Replaced pendant festune C-Track and checked functions of the new c-track and it was good.
pendant -> Replaced
- -> Replaced
Track -> Replaced


Jib is drifting with and without a load. Recommend leveling jib.
Jib -> drifting with and without a load


Path blocked by pipe
ℹ Could not determine any instances in doc - returning doc as is.




Load block is full of coal dust, recommend to disassemble, clean, lubricate. Reassemble, test run.
Load block -> full of coal dust


Chain is worn and needs replaced.
Chain -> worn
Chain -> needs replaced


In [5]:
import pandas as pd
df = pd.read_csv("data\sp21_us_comments_2022.csv")
df = df[["CommentsLong"]]

results = []
for doc in nlp.pipe(df["CommentsLong"].tolist(), disable=["tagger", "parser"]):
    for name, proc in nlp2.pipeline:
        doc = proc(doc)
    for value, rel_dict in doc._.rel.items():
        for sent in doc.sents:
            for e in sent.ents:
                for b in sent.ents:
                    if e.start == value[0] and b.start == value[1]:
                        if rel_dict['status_of'] >=0.5:
                            result = {"text": doc.text, "component": b.text, "status": e.text}
                            results.append(result)

ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could not determine any instances in doc - returning doc as is.
ℹ Could no

In [57]:
df_res = pd.DataFrame(results)
df_res.to_csv("data\sp21_us_comments_2022_relations.csv", index=False)