# Import Important Libraries

In [10]:
import requests
from bs4 import BeautifulSoup
import gensim

In [6]:
from gensim.summarization import summarize

In [11]:
url = 'https://www.npr.org/2021/04/28/991683886/frightened-to-death-cheerleader-speech-case-gives-supreme-court-pause'

In [12]:
page = requests.get(url).text

In [13]:
soup = BeautifulSoup(page)

In [14]:
header = soup.find('h1').get_text()

In [15]:
date = soup.find('span', attrs = {'class': 'date'}).text
date

'April 28, 2021'

In [16]:
p_tags = soup.find_all('p')

In [17]:
text = [tags.get_text().strip() for tags in p_tags]

In [18]:
print (text)

['Nina Totenberg', 'Brandi Levy wears her former cheerleading outfit as she looks at her mobile phone while sitting outside Mahanoy Area High School in Mahanoy City, Pa., on April 4.\n                \n                \n                    \n                    Danna Singer/ACLU/AP\n                    \n                \nhide caption', 'Brandi Levy wears her former cheerleading outfit as she looks at her mobile phone while sitting outside Mahanoy Area High School in Mahanoy City, Pa., on April 4.', 'Facing its biggest student speech case in a half-century, the Supreme Court seemed to be looking for a narrow exit door on Wednesday.', 'At issue was whether schools may punish students for speech that occurs online and off-campus but that may affect school order.', 'The case has been billed as the most important student speech case since 1969. That landmark ruling came at the height of the Vietnam War. Mary Beth Tinker and four other students went to court after they were suspended for we

In [19]:
sentence_list = [sentence for sentence in text if not '\n' in sentence]

In [20]:
sentence_list = [sentence for sentence in sentence_list if '.' in sentence]

In [21]:
article = ''.join(sentence_list)
article

'Brandi Levy wears her former cheerleading outfit as she looks at her mobile phone while sitting outside Mahanoy Area High School in Mahanoy City, Pa., on April 4.Facing its biggest student speech case in a half-century, the Supreme Court seemed to be looking for a narrow exit door on Wednesday.At issue was whether schools may punish students for speech that occurs online and off-campus but that may affect school order.The case has been billed as the most important student speech case since 1969. That landmark ruling came at the height of the Vietnam War. Mary Beth Tinker and four other students went to court after they were suspended for wearing black armbands to school to protest the war.By a vote of 7 to 2, the high court ruled at the time for the first time that kids do have First Amendment free speech rights at school, unless school officials reasonably forecast it will cause disruptions.Wednesday\'s case did not involve such serious speech. It was brought by Brandi Levy, a 14-yea

In [49]:
summary = summarize(article, ratio=0.5)

In [41]:
import docx

In [42]:
doc = docx.Document()

In [43]:
doc.add_heading(header)

<docx.text.paragraph.Paragraph at 0x7fbcd12ad460>

In [44]:
doc.add_picture('img.png')

<docx.shape.InlineShape at 0x7fbcd12b5b20>

In [45]:
doc.add_paragraph(date)

<docx.text.paragraph.Paragraph at 0x7fbcd12ad6a0>

In [46]:
doc.add_paragraph(summary)

<docx.text.paragraph.Paragraph at 0x7fbcd12ad190>

In [47]:
doc.save('document.docx')

In [50]:
summary

'Brandi Levy wears her former cheerleading outfit as she looks at her mobile phone while sitting outside Mahanoy Area High School in Mahanoy City, Pa., on April 4.Facing its biggest student speech case in a half-century, the Supreme Court seemed to be looking for a narrow exit door on Wednesday.At issue was whether schools may punish students for speech that occurs online and off-campus but that may affect school order.The case has been billed as the most important student speech case since 1969.\nThat landmark ruling came at the height of the Vietnam War. Mary Beth Tinker and four other students went to court after they were suspended for wearing black armbands to school to protest the war.By a vote of 7 to 2, the high court ruled at the time for the first time that kids do have First Amendment free speech rights at school, unless school officials reasonably forecast it will cause disruptions.Wednesday\'s case did not involve such serious speech.\nAnd on Wednesday, the Supreme Court f

# Tokenization

In [51]:
import spacy

In [52]:
nlp = spacy.load('en_core_web_sm')

In [54]:
doc = nlp(summary)

In [55]:
for token in doc:
    print(token.text, '--->', spacy.explain(token.text))

Brandi ---> None
Levy ---> None
wears ---> None
her ---> None
former ---> None
cheerleading ---> None
outfit ---> None
as ---> None
she ---> None
looks ---> None
at ---> None
her ---> None
mobile ---> None
phone ---> None
while ---> None
sitting ---> None
outside ---> None
Mahanoy ---> None
Area ---> None
High ---> None
School ---> None
in ---> None
Mahanoy ---> None
City ---> None
, ---> punctuation mark, comma
Pa. ---> None
, ---> punctuation mark, comma
on ---> None
April ---> None
4.Facing ---> None
its ---> None
biggest ---> None
student ---> None
speech ---> None
case ---> case marking
in ---> None
a ---> None
half ---> None
- ---> None
century ---> None
, ---> punctuation mark, comma
the ---> None
Supreme ---> None
Court ---> None
seemed ---> None
to ---> None
be ---> None
looking ---> None
for ---> None
a ---> None
narrow ---> None
exit ---> None
door ---> None
on ---> None
Wednesday ---> None
. ---> punctuation mark, sentence closer
At ---> None
issue ---> None
was ---> None
w

school ---> None
, ---> punctuation mark, comma
the ---> None
priority ---> None
is ---> None
not ---> None
to ---> None
give ---> None
the ---> None
school ---> None
discretion ---> None
to ---> None
regulate ---> None
kids ---> None
' ---> None
speech ---> None
, ---> punctuation mark, comma
" ---> None
he ---> None
said ---> None
. ---> punctuation mark, sentence closer
But ---> None
the ---> None
school ---> None
district ---> None
's ---> None
lawyer ---> None
, ---> punctuation mark, comma
Lisa ---> None
Blatt ---> None
said ---> None
that ---> None
standard ---> None
would ---> None
be ---> None
a ---> None
" ---> None
nightmare ---> None
. ---> punctuation mark, sentence closer
" ---> None
It ---> None
would ---> None
, ---> punctuation mark, comma
she ---> None
said ---> None
, ---> punctuation mark, comma
mean ---> None
" ---> None
open ---> None
season ---> None
" ---> None
on ---> None
schools ---> None
and ---> None
produce ---> None
" ---> None
chaos ---> None
" ---> None

# POS Tagging

In [56]:
for token in doc:
    print(token.text, '--->', token.pos_, spacy.explain(token.pos_))

Brandi ---> PROPN proper noun
Levy ---> PROPN proper noun
wears ---> VERB verb
her ---> PRON pronoun
former ---> ADJ adjective
cheerleading ---> NOUN noun
outfit ---> NOUN noun
as ---> ADP adposition
she ---> PRON pronoun
looks ---> VERB verb
at ---> ADP adposition
her ---> PRON pronoun
mobile ---> ADJ adjective
phone ---> NOUN noun
while ---> SCONJ subordinating conjunction
sitting ---> VERB verb
outside ---> ADP adposition
Mahanoy ---> PROPN proper noun
Area ---> PROPN proper noun
High ---> PROPN proper noun
School ---> PROPN proper noun
in ---> ADP adposition
Mahanoy ---> PROPN proper noun
City ---> PROPN proper noun
, ---> PUNCT punctuation
Pa. ---> PROPN proper noun
, ---> PUNCT punctuation
on ---> ADP adposition
April ---> PROPN proper noun
4.Facing ---> NUM numeral
its ---> PRON pronoun
biggest ---> ADJ adjective
student ---> NOUN noun
speech ---> NOUN noun
case ---> NOUN noun
in ---> ADP adposition
a ---> DET determiner
half ---> ADJ adjective
- ---> PUNCT punctuation
century -

few ---> ADJ adjective
hypotheticals ---> NOUN noun
, ---> PUNCT punctuation
and ---> CCONJ coordinating conjunction
you ---> PRON pronoun
just ---> ADV adverb
tell ---> VERB verb
me ---> PRON pronoun
school ---> NOUN noun
speech ---> NOUN noun
or ---> CCONJ coordinating conjunction
not ---> PART particle
school ---> NOUN noun
speech?"Each ---> NOUN noun
of ---> ADP adposition
these ---> DET determiner
, ---> PUNCT punctuation
conceded ---> VERB verb
Stewart ---> PROPN proper noun
, ---> PUNCT punctuation
would ---> AUX auxiliary
be ---> AUX auxiliary
considered ---> VERB verb
school ---> NOUN noun
speech ---> NOUN noun
under ---> ADP adposition
the ---> DET determiner
standard ---> NOUN noun
proposed ---> VERB verb
by ---> ADP adposition
the ---> DET determiner
Justice ---> PROPN proper noun
Department ---> PROPN proper noun
. ---> PUNCT punctuation

 ---> SPACE space
In ---> ADP adposition
other ---> ADJ adjective
words ---> NOUN noun
, ---> PUNCT punctuation
all ---> DET determiner


# Depenency Parsing

In [58]:
for token in doc:
    print (token.text, '--->', token.dep_, spacy.explain(token.dep_))

Brandi ---> compound compound
Levy ---> nsubj nominal subject
wears ---> ROOT None
her ---> poss possession modifier
former ---> amod adjectival modifier
cheerleading ---> compound compound
outfit ---> dobj direct object
as ---> mark marker
she ---> nsubj nominal subject
looks ---> advcl adverbial clause modifier
at ---> prep prepositional modifier
her ---> poss possession modifier
mobile ---> amod adjectival modifier
phone ---> pobj object of preposition
while ---> mark marker
sitting ---> advcl adverbial clause modifier
outside ---> prep prepositional modifier
Mahanoy ---> compound compound
Area ---> compound compound
High ---> compound compound
School ---> pobj object of preposition
in ---> prep prepositional modifier
Mahanoy ---> compound compound
City ---> pobj object of preposition
, ---> punct punctuation
Pa. ---> appos appositional modifier
, ---> punct punctuation
on ---> prep prepositional modifier
April ---> pobj object of preposition
4.Facing ---> nummod numeric modifier
it

, ---> punct punctuation
" ---> punct punctuation
Can ---> aux auxiliary
I ---> nsubj nominal subject
give ---> ccomp clausal complement
you ---> dative dative
a ---> quantmod modifier of quantifier
few ---> amod adjectival modifier
hypotheticals ---> dobj direct object
, ---> punct punctuation
and ---> cc coordinating conjunction
you ---> nsubj nominal subject
just ---> advmod adverbial modifier
tell ---> conj conjunct
me ---> dative dative
school ---> compound compound
speech ---> dobj direct object
or ---> cc coordinating conjunction
not ---> neg negation modifier
school ---> conj conjunct
speech?"Each ---> appos appositional modifier
of ---> prep prepositional modifier
these ---> pobj object of preposition
, ---> punct punctuation
conceded ---> xcomp open clausal complement
Stewart ---> dobj direct object
, ---> punct punctuation
would ---> aux auxiliary
be ---> auxpass auxiliary (passive)
considered ---> ccomp clausal complement
school ---> compound compound
speech ---> oprd objec

# Lemmatization

In [59]:
for token in doc:
    print (token.text, '--->', token.lemma_, spacy.explain(token.lemma_))

Brandi ---> Brandi None
Levy ---> Levy None
wears ---> wear None
her ---> her None
former ---> former None
cheerleading ---> cheerleading None
outfit ---> outfit None
as ---> as None
she ---> she None
looks ---> look None
at ---> at None
her ---> her None
mobile ---> mobile None
phone ---> phone None
while ---> while None
sitting ---> sit None
outside ---> outside None
Mahanoy ---> Mahanoy None
Area ---> Area None
High ---> High None
School ---> School None
in ---> in None
Mahanoy ---> Mahanoy None
City ---> City None
, ---> , punctuation mark, comma
Pa. ---> Pa. None
, ---> , punctuation mark, comma
on ---> on None
April ---> April None
4.Facing ---> 4.facing None
its ---> its None
biggest ---> big None
student ---> student None
speech ---> speech None
case ---> case case marking
in ---> in None
a ---> a None
half ---> half None
- ---> - None
century ---> century None
, ---> , punctuation mark, comma
the ---> the None
Supreme ---> Supreme None
Court ---> Court None
seemed ---> seem No

or ---> or None
not ---> not None
school ---> school None
speech?"Each ---> speech?"each None
of ---> of None
these ---> these None
, ---> , punctuation mark, comma
conceded ---> concede None
Stewart ---> Stewart None
, ---> , punctuation mark, comma
would ---> would None
be ---> be None
considered ---> consider None
school ---> school None
speech ---> speech None
under ---> under None
the ---> the None
standard ---> standard None
proposed ---> propose None
by ---> by None
the ---> the None
Justice ---> Justice None
Department ---> Department None
. ---> . punctuation mark, sentence closer

 ---> 
 None
In ---> in None
other ---> other None
words ---> word None
, ---> , punctuation mark, comma
all ---> all None
of ---> of None
these ---> these None
messages ---> message None
sent ---> send None
from ---> from None
off ---> off None
campus ---> campus None
might ---> might None
well ---> well None
be ---> be None
punishable ---> punishable None
school ---> school None
speech ---> speech

# Sentence Boundary Detection

In [60]:
doc.sents

<generator at 0x7fbcb6b705e0>

In [61]:
sentences = list(doc.sents)

In [63]:
for sentence in sentences:
    print (sentence)

Brandi Levy wears her former cheerleading outfit as she looks at her mobile phone while sitting outside Mahanoy Area High School in Mahanoy City, Pa., on April 4.Facing its biggest student speech case in a half-century, the Supreme Court seemed to be looking for a narrow exit door on Wednesday.
At issue was whether schools may punish students for speech that occurs online and off-campus but that may affect school order.
The case has been billed as the most important student speech case since 1969.


That landmark ruling came at the height of the Vietnam War.
Mary Beth Tinker and four other students went to court after they were suspended for wearing black armbands to school to protest the war.
By a vote of 7 to 2, the high court ruled at the time for the first time that kids do have First Amendment free speech rights at school, unless school officials reasonably forecast it will cause disruptions.
Wednesday's case did not involve such serious speech.


And on Wednesday, the Supreme Cou

# Named Entity

In [64]:
for ent in doc.ents:
    print(ent.text, '--->', ent.label_, '--->', spacy.explain(ent.label_))

Brandi Levy ---> PERSON ---> People, including fictional
Mahanoy Area High School ---> ORG ---> Companies, agencies, institutions, etc.
Mahanoy City ---> GPE ---> Countries, cities, states
Pa. ---> GPE ---> Countries, cities, states
April 4.Facing ---> DATE ---> Absolute or relative dates or periods
a half-century ---> DATE ---> Absolute or relative dates or periods
the Supreme Court ---> ORG ---> Companies, agencies, institutions, etc.
Wednesday ---> DATE ---> Absolute or relative dates or periods
1969 ---> DATE ---> Absolute or relative dates or periods
the Vietnam War ---> EVENT ---> Named hurricanes, battles, wars, sports events, etc.
Mary Beth Tinker ---> PERSON ---> People, including fictional
four ---> CARDINAL ---> Numerals that do not fall under another type
7 to 2 ---> CARDINAL ---> Numerals that do not fall under another type
first ---> ORDINAL ---> "first", "second", etc.
First Amendment ---> LAW ---> Named documents made into laws.
Wednesday ---> DATE ---> Absolute or rela

# Visualization

In [65]:
from spacy import displacy

In [66]:
displacy.render(doc, style='ent', jupyter=True)

# Dependency Parsing

In [67]:
displacy.render(doc, style='dep', jupyter=True)

# Putting it all together

In [69]:
lis = []

In [71]:
for token in doc:
    dic ={}
    dic['Token'] = token.text
    dic['POS'] = token.pos_
    dic['Tags'] = token.tag_
    dic['Dep'] = token.dep_
    dic['Explanation']= spacy.explain(token.tag_)
    lis.append(dic)

In [74]:
import pandas as pd

In [78]:
data = pd.DataFrame(lis)

In [79]:
data

Unnamed: 0,Token,POS,Tags,Dep,Explanation
0,Brandi,PROPN,NNP,compound,"noun, proper singular"
1,Levy,PROPN,NNP,nsubj,"noun, proper singular"
2,wears,VERB,VBZ,ROOT,"verb, 3rd person singular present"
3,her,PRON,PRP$,poss,"pronoun, possessive"
4,former,ADJ,JJ,amod,adjective
...,...,...,...,...,...
748,in,ADP,IN,prep,"conjunction, subordinating or preposition"
749,the,DET,DT,det,determiner
750,lower,ADJ,JJR,amod,"adjective, comparative"
751,courts,NOUN,NNS,pobj,"noun, plural"


In [84]:
data.to_csv('nlptasks1.csv', index = False)

In [86]:
l = []
for ent in doc.ents:
    d = {}
    d['Entities'] = ent.text
    d['Labels'] = ent.label_
    d['Explanation'] = spacy.explain(ent.label_)
    l.append(d)

In [87]:
entities = pd.DataFrame(l)

In [88]:
entities

Unnamed: 0,Entities,Labels,Explanation
0,Brandi Levy,PERSON,"People, including fictional"
1,Mahanoy Area High School,ORG,"Companies, agencies, institutions, etc."
2,Mahanoy City,GPE,"Countries, cities, states"
3,Pa.,GPE,"Countries, cities, states"
4,April 4.Facing,DATE,Absolute or relative dates or periods
5,a half-century,DATE,Absolute or relative dates or periods
6,the Supreme Court,ORG,"Companies, agencies, institutions, etc."
7,Wednesday,DATE,Absolute or relative dates or periods
8,1969,DATE,Absolute or relative dates or periods
9,the Vietnam War,EVENT,"Named hurricanes, battles, wars, sports events..."


In [89]:
entities.to_csv('nlptasks2.csv', index = False)