## **Using Scapy**

In [49]:
import spacy

In [50]:
from spacy.lang.en.stop_words import STOP_WORDS

print(STOP_WORDS)  # stopwords list in spacy.language.english
print("\n",len(STOP_WORDS))

{'fifteen', 'into', 'therefore', 'move', 'should', "'ve", 'nothing', 'whither', 'forty', 'however', 'very', 'six', 'or', 'what', 'the', 'seemed', 'three', 'why', 'him', 'ten', 'no', 'whole', 'than', 'did', 'again', 'none', 'see', 'elsewhere', '’ve', 'get', 'am', 'used', 'please', 'how', 'last', 'when', 'such', 'because', 'as', 'yours', 'fifty', 'cannot', 'since', 'off', '‘m', 'namely', 'they', 'his', 'with', 'ever', 'now', 'moreover', 'else', 'afterwards', 'a', 'your', 'meanwhile', 'already', 'really', 'make', 'other', 'everywhere', 'who', 'sometimes', 'must', 'unless', 'almost', 'of', 'twenty', 'hereupon', 'put', '‘re', 'therein', 'thence', 'former', 'while', 'onto', 'sometime', 'an', 'everyone', 'much', '‘d', 'five', 'hence', 'well', 'and', 'between', 'once', 'regarding', 'another', 'toward', '’ll', 'whereby', 'even', 'yourself', 'eight', 'them', 'least', 'up', 'her', 'he', 'from', 'formerly', 'both', 'ours', 'whereas', 'my', 'whom', 'besides', 'itself', 'may', 'you', 'at', 'becoming

In [51]:
nlp = spacy.load("en_core_web_sm")

text = "India, officially the Republic of India,[j][20] is a country in South Asia. It is the seventh-largest country in the world by area and the most populous country. Bounded by the Indian Ocean on the south, the Arabian Sea on the southwest, and the Bay of Bengal on the southeast, it shares land borders with Pakistan to the west;[k] China, Nepal, and Bhutan to the north; and Bangladesh and Myanmar to the east."
doc = nlp(text)

stopword_lst_txt = []
for token in doc:
    if token.is_stop:   # if token is stopwords then print that token
        stopword_lst_txt.append(token.text)

print(stopword_lst_txt)

['the', 'of', 'is', 'a', 'in', 'It', 'is', 'the', 'in', 'the', 'by', 'and', 'the', 'most', 'by', 'the', 'on', 'the', 'the', 'on', 'the', 'and', 'the', 'of', 'on', 'the', 'it', 'with', 'to', 'the', 'and', 'to', 'the', 'and', 'and', 'to', 'the']


# <b>```Remove Stopwords from given Text```</b>

In [52]:
def remove_stopwords(text):
    doc = nlp(text)

    # remove stopwords, punctuation, digit and take only alphabetic token and join that token
    no_stop_word = [token.text for token in doc if not token.is_stop and not token.is_punct and not token.is_digit and token.is_alpha]

    return " ".join(no_stop_word)

remove_stopwords(text)

'India officially Republic country South Asia seventh largest country world area populous country Bounded Indian Ocean south Arabian Sea southwest Bay Bengal southeast shares land borders Pakistan China Nepal Bhutan north Bangladesh Myanmar east'

-  **Apply these function for removing stopwords on real datasets**

In [53]:
import pandas as pd
df = pd.read_csv("/content/drugsComTrain_raw.tsv",sep="\t")
df_rows = df.iloc[:10,:]

In [54]:
df_rows

Unnamed: 0,drugName,condition,review,rating,date,usefulCount
206461,Valsartan,Left Ventricular Dysfunction,"""It has no side effect, I take it in combinati...",9.0,"May 20, 2012",27
95260,Guanfacine,ADHD,"""My son is halfway through his fourth week of ...",8.0,"April 27, 2010",192
92703,Lybrel,Birth Control,"""I used to take another oral contraceptive, wh...",5.0,"December 14, 2009",17
138000,Ortho Evra,Birth Control,"""This is my first time using any form of birth...",8.0,"November 3, 2015",10
35696,Buprenorphine / naloxone,Opiate Dependence,"""Suboxone has completely turned my life around...",9.0,"November 27, 2016",37
155963,Cialis,Benign Prostatic Hyperplasia,"""2nd day on 5mg started to work with rock hard...",2.0,"November 28, 2015",43
165907,Levonorgestrel,Emergency Contraception,"""He pulled out, but he cummed a bit in me. I t...",1.0,"March 7, 2017",5
102654,Aripiprazole,Bipolar Disorde,"""Abilify changed my life. There is hope. I was...",10.0,"March 14, 2015",32
74811,Keppra,Epilepsy,""" I Ve had nothing but problems with the Kepp...",1.0,"August 9, 2016",11
48928,Ethinyl estradiol / levonorgestrel,Birth Control,"""I had been on the pill for many years. When m...",8.0,"December 8, 2016",1


In [55]:
df_rows['review'] = df_rows['review'].apply(remove_stopwords)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_rows['review'] = df_rows['review'].apply(remove_stopwords)


In [56]:
df_rows

Unnamed: 0,drugName,condition,review,rating,date,usefulCount
206461,Valsartan,Left Ventricular Dysfunction,effect combination Bystolic Mg Fish Oil,9.0,"May 20, 2012",27
95260,Guanfacine,ADHD,son halfway fourth week Intuniv concerned bega...,8.0,"April 27, 2010",192
92703,Lybrel,Birth Control,oral contraceptive pill cycle light periods ma...,5.0,"December 14, 2009",17
138000,Ortho Evra,Birth Control,time form birth control glad went patch months...,8.0,"November 3, 2015",10
35696,Buprenorphine / naloxone,Opiate Dependence,Suboxone completely turned life feel healthier...,9.0,"November 27, 2016",37
155963,Cialis,Benign Prostatic Hyperplasia,day mg started work rock hard erections experi...,2.0,"November 28, 2015",43
165907,Levonorgestrel,Emergency Contraception,pulled cummed bit took Plan B hours later took...,1.0,"March 7, 2017",5
102654,Aripiprazole,Bipolar Disorde,Abilify changed life hope Zoloft Clonidine sta...,10.0,"March 14, 2015",32
74811,Keppra,Epilepsy,Ve problems Keppera constant shaking arms amp ...,1.0,"August 9, 2016",11
48928,Ethinyl estradiol / levonorgestrel,Birth Control,pill years doctor changed RX chateal effective...,8.0,"December 8, 2016",1


## **Using NLTK**

In [57]:
import nltk
from nltk.tokenize import word_tokenize
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [58]:
from nltk.corpus import stopwords

stopword_lst_txt = stopwords.words('english')    ## stopwords lst of nltk
print(stopword_lst_txt)

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

# <b>```Remove Stopwords from given Text```</b>

In [59]:
'''text = "India, officially the Republic of India,[j][20] is a country in South Asia.
It is the seventh-largest country in the world by area and the most populous country. Bounded by the Indian Ocean on the south,
the Arabian Sea on the southwest, and the Bay of Bengal on the southeast, it shares land borders with Pakistan to the west;[k] China,
Nepal, and Bhutan to the north; and Bangladesh and Myanmar to the east.'''

def remove_stopwords(text):

    tokens = word_tokenize(text)

    # remove stopwords and take only alphabetic token and join that token
    no_stop_word = [token for token in tokens if not token in stopword_lst_txt and token.isalpha()]

    return " ".join(no_stop_word)

remove_stopwords(text)

'India officially Republic India j country South Asia It country world area populous country Bounded Indian Ocean south Arabian Sea southwest Bay Bengal southeast shares land borders Pakistan west k China Nepal Bhutan north Bangladesh Myanmar east'