# Practical NLP with spaCy

[spaCy](https://spacy.io/) is an industrial Natural Language Processing (NLP) ecosystem with a lot of great features that can simplify comon or complicated NLU tasks.

In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import sklearn as sk
import spacy

In [2]:
# Load NLP models (both English and Japanese)
enlp = spacy.load('en_core_web_trf')
jnlp = spacy.load('ja_core_news_lg')

In [3]:
# Left-align all columns of a given `dataFrm` DataFrame
def left_align(dataFrm: pd.DataFrame):
  dataFrm = dataFrm.style.set_properties(**{'text-align': 'left'})
  dataFrm = dataFrm.set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])])
  return dataFrm

# Table header
headers = ["Text", "Lemma", "POS", "Tag", "Dep", "Shape", "Is Alpha", "Is Stop"]

# Check if the English NLP model works
edoc = enlp("This is a sentence.")
e_tokens_info = []

for token in edoc:
    e_tokens_info.append([token.text, token.lemma_, token.pos_,
                          token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop])

e_table = pd.DataFrame(columns=headers, data=e_tokens_info)
e_table = left_align(e_table)
e_table

Unnamed: 0,Text,Lemma,POS,Tag,Dep,Shape,Is Alpha,Is Stop
0,This,this,DET,DT,nsubj,Xxxx,True,True
1,is,be,AUX,VBZ,ROOT,xx,True,True
2,a,a,DET,DT,det,x,True,True
3,sentence,sentence,NOUN,NN,attr,xxxx,True,False
4,.,.,PUNCT,.,punct,.,False,False


In [4]:
# Check if the Japanese NLP model works
jdoc = jnlp("こんにちは！日本語の例文を自然言語処理で解釈します。")
j_tokens_info = []

for token in jdoc:
    j_tokens_info.append([token.text, token.lemma_, token.pos_,
                          token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop])

j_table = pd.DataFrame(columns=headers, data=j_tokens_info)
j_table = left_align(j_table)
j_table

Unnamed: 0,Text,Lemma,POS,Tag,Dep,Shape,Is Alpha,Is Stop
0,こんにちは,こんにちは,INTJ,感動詞-一般,advcl,xxxx,True,False
1,！,!,PUNCT,補助記号-句点,aux,！,False,False
2,日本,日本,PROPN,名詞-固有名詞-地名-国,compound,xx,True,False
3,語,語,NOUN,名詞-普通名詞-一般,nmod,x,True,False
4,の,の,ADP,助詞-格助詞,case,x,True,True
5,例文,例文,NOUN,名詞-普通名詞-一般,obj,xx,True,False
6,を,を,ADP,助詞-格助詞,case,x,True,True
7,自然,自然,NOUN,名詞-普通名詞-一般,compound,xx,True,False
8,言語,言語,NOUN,名詞-普通名詞-一般,compound,xx,True,False
9,処理,処理,NOUN,名詞-普通名詞-サ変可能,obl,xx,True,False
