In [1]:

import spacy

# মডেল লোড করা
nlp = spacy.load("en_core_web_sm")

doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
for token in doc:
    print(token.text)


Apple
is
looking
at
buying
U.K.
startup
for
$
1
billion


In [2]:
for token in doc:
    print(token.pos_)

PROPN
AUX
VERB
ADP
VERB
PROPN
VERB
ADP
SYM
NUM
NUM


In [3]:
for token in doc:
    print(token.dep_)
    

nsubj
aux
ROOT
prep
pcomp
nsubj
ccomp
prep
quantmod
compound
pobj


# Tokenization

In [4]:
for token in doc:
    print(token.text)

Apple
is
looking
at
buying
U.K.
startup
for
$
1
billion


In [5]:
for token in doc:
    print(token.lemma_)

Apple
be
look
at
buy
U.K.
startup
for
$
1
billion


In [6]:
for token in doc:
    print(token.pos_)

PROPN
AUX
VERB
ADP
VERB
PROPN
VERB
ADP
SYM
NUM
NUM


In [7]:
for token in doc:
    print(token.tag_)

NNP
VBZ
VBG
IN
VBG
NNP
VBD
IN
$
CD
CD


In [8]:
for token in doc:
    print(token.dep_)

nsubj
aux
ROOT
prep
pcomp
nsubj
ccomp
prep
quantmod
compound
pobj


In [9]:
for token in doc:
    print(token.shape_)

Xxxxx
xx
xxxx
xx
xxxx
X.X.
xxxx
xxx
$
d
xxxx


In [10]:
for token in doc:
    print(token.is_alpha)

True
True
True
True
True
False
True
True
False
False
True


In [11]:
for token in doc:
    print(token.is_stop)

False
True
False
True
False
False
False
True
False
False
False


# Named Entities

Text: The original entity text.
Start: Index of start of entity in the Doc.
End: Index of end of entity in the Doc.
Label: Entity label, i.e. type.

In [12]:
for ent in doc.ents:
    print("text: ",ent.text)
  

text:  Apple
text:  U.K.
text:  $1 billion


In [13]:
for ent in doc.ents:
    print("start_char: ",ent.start_char)
  

start_char:  0
start_char:  27
start_char:  44


In [14]:
for ent in doc.ents:
    print("end_char: ",ent.end_char)
  

end_char:  5
end_char:  31
end_char:  54


In [15]:
for ent in doc.ents:
    print("label: ",ent.label_)
  

label:  ORG
label:  GPE
label:  MONEY


# Word vector and similarity

In [16]:
for token in doc:
    print(token.text)

Apple
is
looking
at
buying
U.K.
startup
for
$
1
billion


In [17]:
for token in doc:
    print(token.has_vector)

True
True
True
True
True
True
True
True
True
True
True


In [18]:
for token in doc:
    print(token.vector_norm)

6.7813044
7.497386
6.7997518
6.765654
8.256932
7.4931884
5.2585325
6.5701003
8.750212
9.714703
7.9830933


In [19]:
for token in doc:
    print(token.is_oov)

True
True
True
True
True
True
True
True
True
True
True
