In [1]:
import spacy
from spacy.matcher import Matcher
from spacy.matcher import DependencyMatcher
from spacy import displacy


In [2]:
nlp = spacy.load("en_core_web_sm")

In [3]:
matcher = DependencyMatcher(nlp.vocab)

In [6]:
doc = nlp("Experience programming in Spark (ML, Mllib, Spark SQL), R, Python")
displacy.render(doc, jupyter=True, style="dep")

In [69]:
doc = nlp("Experience effectively communicating and presenting data to a variety of audiences required.")
displacy.render(doc, jupyter=True, style="dep")

In [71]:
matcher = DependencyMatcher(nlp.vocab)
patterns = [
  {
      "RIGHT_ID": "anchor_experience",       # unique name
      "RIGHT_ATTRS": {"LOWER": "experience"}  # token pattern for "founded"
  },
  {
      "LEFT_ID": "anchor_experience",
      "REL_OP": ">>",
      "RIGHT_ID": "skills_1", 
      "RIGHT_ATTRS": {"DEP":{"IN": ["acl", "conj"]}}
  },
  {
      "LEFT_ID": "skills_1",
      "REL_OP": ">",
      "RIGHT_ID": "skills_2", 
      "RIGHT_ATTRS": {"DEP":{"IN": ["conj"]}}
  },
  {
      "LEFT_ID": "skills_2",
      "REL_OP": ">>",
      "RIGHT_ID": "object",
      "RIGHT_ATTRS": {"POS": {"IN": ["NOUN", "PROPN"]}}
  }
    
]

matcher.add("EXPERIENCE", [pattern])
matches = None
matches = matcher(doc)
print(matches)

match_id, token_ids = matches[0]
for i in range(len(token_ids)):
    print(pattern[i]["RIGHT_ID"] + ":", doc[token_ids[i]].text)

[(7324372616739864093, [0, 2, 4, 5]), (7324372616739864093, [0, 2, 4, 8]), (7324372616739864093, [0, 2, 4, 10])]
anchor_experience: Experience
skills_1: communicating
skills_2: presenting
object: data


In [30]:
len(matches)

5

In [44]:
def compounds_finder(doc):

    com_range = []
    max_ind = len(doc)
    for idx, tok in enumerate(doc):
        if((tok.dep_ == "compound") and (idx < max_ind)):
            com_range.append((idx, idx+1))

    result = []
    for i in range(0, len(com_range)-1):
        if(com_range[i][1] == com_range[i+1][0]):
            result.append((com_range[i][0], com_range[i+1][1]))
        else:
            result.append(com_range[i])


    return result

In [101]:
doc = nlp("leadership experience in Product Management; demonstrated success launching and driving.")
#doc = nlp("Experience developing product roadmaps for data and/or technical solutions in a B2B environment.")
displacy.render(doc, jupyter=True, style="dep")

In [20]:
doc = nlp("leadership experience in Product Management; demonstrated success launching and driving.")
#doc = nlp("Significant experience with the full software development lifecycle. and software development methodologies (Agile, Iterative).")
matcher = DependencyMatcher(nlp.vocab)
pattern = [
  {
      "RIGHT_ID": "anchor",       # unique name
      "RIGHT_ATTRS": {"LOWER": "experience", "POS": "NOUN"}  # token pattern for "founded"
  },
  {
      "LEFT_ID": "anchor",
      "REL_OP": ">>",
      "RIGHT_ID": "experience",
      "RIGHT_ATTRS": {"POS": {"IN": ["NOUN", "PROPN","VERB"]}}
  },
  {
       "LEFT_ID": "experience",
        "REL_OP": ".*",
        "RIGHT_ID": "punctuation",
        "RIGHT_ATTRS": {"IS_PUNCT": True}
  }
    
]

matcher.add("EXPERIENCE", [pattern])
matches = None
matches = matcher(doc)
print(matches)

for match in matches:
    match_id, token_ids = match
    for i in range(len(token_ids)):
        print(pattern[i]["RIGHT_ID"] + ":", doc[token_ids[i]].text)

[(7324372616739864093, [1, 0, 5]), (7324372616739864093, [1, 0, 11]), (7324372616739864093, [1, 3, 5]), (7324372616739864093, [1, 3, 11]), (7324372616739864093, [1, 4, 5]), (7324372616739864093, [1, 4, 11])]
anchor: experience
experience: leadership
punctuation: ;
anchor: experience
experience: leadership
punctuation: .
anchor: experience
experience: Product
punctuation: ;
anchor: experience
experience: Product
punctuation: .
anchor: experience
experience: Management
punctuation: ;
anchor: experience
experience: Management
punctuation: .


In [123]:
def matches_location(matches, doc):
    anchors_list = []
    matches_list = []
    for match in matches:
        match_id, token_ids = match
        anchors_list.append(token_ids[0])
        matches_list.append(token_ids[1])
    print(min(anchors_list))
    print(max(matches_list))

        

matches_location(matches, doc)

1
15


In [12]:
with open('../data/soft_skills.txt','r') as file:
    text = file.read()
    
    
print(text)

leadership experience in Product Management; demonstrated success launching and driving.
Strategic thinker.
presenting.
Problem-solving, design-thinking, customer-centric mindset; highly proactive with a positive ""can do"" attitude.
Experience developing product roadmaps for data and/or technical solutions in a B2B environment.
Strong communicator.
demonstrated ability.
collaborator.
leading through influence.
developing and growing talent.
managing teams.
Defines multiple products.
creating vision, strategy and .
own and manage end-to-end architecture.
define the roadmap and strategy.
understand the big picture.
strategy, drive design and technical implementation, and architect innovative solutions.
This position requires excellent client services and technical skills; expertise with architecture, design, and implementation of information security software, identity and access management system, or enterprise software; and documentation experience.
candidates should have a high energ

In [13]:
doc = nlp(text)

In [14]:
sents = list(doc.sents)
displacy.render(sents[0], jupyter=True, style="dep")

In [35]:
for t in sents[0]:
    print(t.text + " " + t.dep_ + " " + t.pos_ + " " + str(t.morph))

leadership compound NOUN Number=Sing
experience nsubj NOUN Number=Sing
in prep ADP 
Product compound PROPN Number=Sing
Management pobj PROPN Number=Sing
; punct PUNCT 
demonstrated ROOT VERB Tense=Past|VerbForm=Fin
success dobj NOUN Number=Sing
launching acl VERB Aspect=Prog|Tense=Pres|VerbForm=Part
and cc CCONJ ConjType=Cmp
driving conj NOUN Number=Sing
. punct PUNCT PunctType=Peri


In [46]:
print(sents[4])
for t in sents[4]:
    print(t.text + " " + t.dep_ + " " + t.pos_)


Experience developing product roadmaps for data and/or technical solutions in a B2B environment.

 dep SPACE
Experience dep NOUN
developing amod VERB
product compound NOUN
roadmaps ROOT NOUN
for prep ADP
data nmod NOUN
and/or cc CCONJ
technical conj ADJ
solutions pobj NOUN
in prep ADP
a det DET
B2B compound PROPN
environment pobj NOUN
. punct PUNCT


In [50]:
componds = compounds_finder(sents[0])
print(componds)


[(0, 1)]


In [13]:
text = 'Lead a team through the ideation, technical development, and launch of innovative products'
text2 = "Product Management AWS opportunity collaborate engineering engineer , design , designer, business development teams . looking entrepreneurial product leader passionate delivering solutions customers excited growing new AWS S3 business "

In [14]:
doc = nlp(text)
displacy.render(doc, jupyter=True, style="dep")

In [75]:
text2 = "Drive product development with a team of world-class engineers and designers"
doc2 = nlp(text2)
displacy.render(doc2, jupyter=True, style="ent")



In [3]:
ruler = nlp.add_pipe("entity_ruler", before="ner") 

In [16]:
patterns = [
    {"label": "SKILL", "pattern": [{"POS": "VERB", "IS_SENT_START": True},  
                                   {"SPACY": True, "OP": "*"}, {"POS": "NOUN"}]},
    {"label": "PROFESSION", "pattern": [{"POS": "NOUN", "LEMMA": {"IN" : ["engineer", "design", "Product Management"]}}]},
    {"label": "PROFESSION", "pattern": [{"LOWER": "product management"}]},
    {"label": "PRODUCT", "pattern": [{"TEXT": "S3"}, {"TEXT": "AWS"}]}
]

ruler.add_patterns(patterns)

doc3 = nlp("Product Management AWS opportunity S3 collaborate engineering engineer , design , designer, business development teams . looking entrepreneurial product leader passionate delivering solutions customers excited growing new AWS S3 business ")
print([(ent.text, ent.label_) for ent in doc3.ents])

displacy.render(doc3, style='ent', jupyter=True)

[('Product Management', 'PROFESSION'), ('AWS', 'PRODUCT'), ('S3', 'PRODUCT'), ('engineer', 'PROFESSION'), ('design', 'PROFESSION'), ('looking entrepreneurial product leader passionate delivering solutions customers excited growing new AWS S3 business', 'SKILL')]
