# NLP model for Microsoft db

In [1]:
# Import the English language class
from spacy.lang.en import English

# Create the nlp object
nlp = English()

In [2]:
text = """Masters or PhD in Materials Engineering or equivalent 
10 years’ experience in a professional engineering laboratory environment and background in reliability testing and analysis.  
Deep understanding of statistical sampling methods. 
 Deep understanding of root cause/corrective action methods to identify contributing factors of production defects. 
Understands server microprocessor architectures, including the operation of related subsystems, interfaces, and components. 
In Depth knowledge of reliability test methods. 
In Depth knowledge in the physics of failure for ASIC, PCB assembled materials 
In Depth knowledge of AC/DC, DC/DC power architectures.   
In Depth knowledge of industry reliability test methods and controls.   
In depth knowledge of server components. 
Able to communicate effectively in written and oral manner, so that all stakeholders have clear understanding of issues and metrics 
Can use and apply complex statistics to evaluate product reliability and highlight potential product risks 
Has strong communication and project management skills"""

In [3]:
text = text.split('\n')
text

['Masters or PhD in Materials Engineering or equivalent ',
 '10 years’ experience in a professional engineering laboratory environment and background in reliability testing and analysis.  ',
 'Deep understanding of statistical sampling methods. ',
 ' Deep understanding of root cause/corrective action methods to identify contributing factors of production defects. ',
 'Understands server microprocessor architectures, including the operation of related subsystems, interfaces, and components. ',
 'In Depth knowledge of reliability test methods. ',
 'In Depth knowledge in the physics of failure for ASIC, PCB assembled materials ',
 'In Depth knowledge of AC/DC, DC/DC power architectures.   ',
 'In Depth knowledge of industry reliability test methods and controls.   ',
 'In depth knowledge of server components. ',
 'Able to communicate effectively in written and oral manner, so that all stakeholders have clear understanding of issues and metrics ',
 'Can use and apply complex statistics to 

In [4]:
doc = nlp(text[0])
print('Index:   ', [token.i for token in doc])
print('Text:    ', [token.text for token in doc])

print('is_alpha:', [token.is_alpha for token in doc])
print('is_punct:', [token.is_punct for token in doc])
print('like_num:', [token.like_num for token in doc])

Index:    [0, 1, 2, 3, 4, 5, 6, 7]
Text:     ['Masters', 'or', 'PhD', 'in', 'Materials', 'Engineering', 'or', 'equivalent']
is_alpha: [True, True, True, True, True, True, True, True]
is_punct: [False, False, False, False, False, False, False, False]
like_num: [False, False, False, False, False, False, False, False]


Predicting Part-of-speech Tags

In [5]:
import spacy

# Load the small English model
nlp = spacy.load('en_core_web_sm')

# Process a text
doc = nlp(text[0])

# Iterate over the tokens
for token in doc:
    # Print the text and the predicted part-of-speech tag
    print(token.text, token.pos_)

Masters NOUN
or CCONJ
PhD NOUN
in ADP
Materials PROPN
Engineering PROPN
or CCONJ
equivalent ADJ


Predicting Syntactic Dependencies

In [6]:
for token in doc:
    print(token.text, token.pos_, token.dep_, token.head.text)

Masters NOUN ROOT Masters
or CCONJ cc Masters
PhD NOUN conj Masters
in ADP prep Masters
Materials PROPN compound Engineering
Engineering PROPN pobj in
or CCONJ cc Masters
equivalent ADJ conj Masters


Predicting Named Entities

In [7]:
print(text[6])
# Process a text
doc = nlp(text[6])

# Iterate over the predicted entities
for ent in doc.ents:
    # Print the entity text and its label
    print(ent.text, ent.label_)

In Depth knowledge in the physics of failure for ASIC, PCB assembled materials 
Depth GPE
ASIC ORG
PCB ORG


In [8]:
spacy.explain('GPE')

'Countries, cities, states'

In [9]:
spacy.explain('NNP')

'noun, proper singular'

In [10]:
spacy.explain('dobj')

'direct object'

In [11]:
spacy.explain('ORG')

'Companies, agencies, institutions, etc.'

In [15]:
import spacy

nlp = spacy.load("en_core_web_sm")

# Process the text
doc = nlp(text[0])

for token in doc:
    # Get the token text, part-of-speech tag and dependency label
    token_text = token.text
    token_pos = token.pos_
    token_dep = token.dep_
    # This is for formatting only
    print("{:<12}{:<10}{:<10}".format(token_text, token_pos, token_dep))

Masters     NOUN      ROOT      
or          CCONJ     cc        
PhD         NOUN      conj      
in          ADP       prep      
Materials   PROPN     compound  
Engineering PROPN     pobj      
or          CCONJ     cc        
equivalent  ADJ       conj      
