# NLP using textBlob + NLTK

> Fork from https://seowithpython.com/how-to-do-text-analysis-using-textblob-and-nltk/

In [31]:
from textblob import TextBlob

In [55]:
import nltk
nltk.download('averaged_perceptron_tagger')
nltk.download('movie_reviews')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/nelth/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package movie_reviews to
[nltk_data]     /home/nelth/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
[nltk_data] Downloading package wordnet to /home/nelth/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/nelth/nltk_data...
[nltk_data]   Unzipping corpora/omw-1.4.zip.


True

In [35]:
text = """Your mind is tossing on the ocean,
       There where your argosies, with portly sail
       Like signiors and rich burghers on the flood,
       Or as it were the pageants of the sea,
       Do overpeer the petty traffickers
       That curtsy to them, do them reverence,
       As they fly by them with their woven wings."""
blob = TextBlob(text)
blob

TextBlob("Your mind is tossing on the ocean,
       There where your argosies, with portly sail
       Like signiors and rich burghers on the flood,
       Or as it were the pageants of the sea,
       Do overpeer the petty traffickers
       That curtsy to them, do them reverence,
       As they fly by them with their woven wings.")

## Pre-processing

In [36]:
tokens = blob.words; tokens

WordList(['Your', 'mind', 'is', 'tossing', 'on', 'the', 'ocean', 'There', 'where', 'your', 'argosies', 'with', 'portly', 'sail', 'Like', 'signiors', 'and', 'rich', 'burghers', 'on', 'the', 'flood', 'Or', 'as', 'it', 'were', 'the', 'pageants', 'of', 'the', 'sea', 'Do', 'overpeer', 'the', 'petty', 'traffickers', 'That', 'curtsy', 'to', 'them', 'do', 'them', 'reverence', 'As', 'they', 'fly', 'by', 'them', 'with', 'their', 'woven', 'wings'])

In [39]:
pos_tagging = blob.tags; pos_tagging

[('Your', 'PRP$'),
 ('mind', 'NN'),
 ('is', 'VBZ'),
 ('tossing', 'VBG'),
 ('on', 'IN'),
 ('the', 'DT'),
 ('ocean', 'NN'),
 ('There', 'EX'),
 ('where', 'WRB'),
 ('your', 'PRP$'),
 ('argosies', 'NNS'),
 ('with', 'IN'),
 ('portly', 'RB'),
 ('sail', 'VBP'),
 ('Like', 'IN'),
 ('signiors', 'NNS'),
 ('and', 'CC'),
 ('rich', 'JJ'),
 ('burghers', 'NNS'),
 ('on', 'IN'),
 ('the', 'DT'),
 ('flood', 'NN'),
 ('Or', 'CC'),
 ('as', 'IN'),
 ('it', 'PRP'),
 ('were', 'VBD'),
 ('the', 'DT'),
 ('pageants', 'NNS'),
 ('of', 'IN'),
 ('the', 'DT'),
 ('sea', 'NN'),
 ('Do', 'NNP'),
 ('overpeer', 'VB'),
 ('the', 'DT'),
 ('petty', 'JJ'),
 ('traffickers', 'NNS'),
 ('That', 'WDT'),
 ('curtsy', 'VBP'),
 ('to', 'TO'),
 ('them', 'PRP'),
 ('do', 'VB'),
 ('them', 'PRP'),
 ('reverence', 'VB'),
 ('As', 'IN'),
 ('they', 'PRP'),
 ('fly', 'VBP'),
 ('by', 'IN'),
 ('them', 'PRP'),
 ('with', 'IN'),
 ('their', 'PRP$'),
 ('woven', 'JJ'),
 ('wings', 'NNS')]

## Analysis

### TextBlog default

In [41]:
for sentence in blob.sentences:
    print(sentence.sentiment)

Sentiment(polarity=0.5875, subjectivity=0.825)


---

### Another analyser

In [46]:
from textblob.sentiments import NaiveBayesAnalyzer

blog = TextBlob(text, analyzer=NaiveBayesAnalyzer())
blog.sentiment

Sentiment(classification='pos', p_pos=0.9989437404713962, p_neg=0.0010562595286025896)

In [51]:
# blob.detect_language()
# Need access to Google API

In [58]:
from textblob import Word

word = Word('varieties')

print('step ->', word.stem())

print('lemmatize ->', word.lemmatize())

step -> varieti
lemmatize -> variety


# Testing textBlog /w French language

> Fork from https://www.kaggle.com/fedi1996/french-sentiment-analysis-using-textblob

In [60]:
import os
import json
import random
from pathlib import Path
from zipfile import ZipFile

In [61]:
kaggle_data = json.load(open(str(Path.home()) + '/kaggle.json', 'r'))
KAGGLE_USERNAME = kaggle_data['username']
KAGGLE_KEY = kaggle_data['key']

In [62]:
if KAGGLE_KEY and KAGGLE_USERNAME: print('Successfully imported access to kaggle')

Successfully imported access to kaggle


In [63]:
os.environ['KAGGLE_USERNAME'] = KAGGLE_USERNAME
os.environ['KAGGLE_KEY'] = KAGGLE_KEY

In [66]:
print('Dataset will be downloaded as zip under the current directory')
!kaggle datasets download -d fedi1996/insurance-reviews-france

Dataset will be downloaded as zip under the current directory
Downloading insurance-reviews-france.zip to /home/nelth/WORKSPACE/datascience-playground/notebooks/machine learning
  0%|                                               | 0.00/1.41M [00:00<?, ?B/s]
100%|██████████████████████████████████████| 1.41M/1.41M [00:00<00:00, 25.4MB/s]


In [67]:
!rm -rf data/ && unzip *.zip -d ./data

Archive:  insurance-reviews-france.zip
  inflating: ./data/Comments.csv     


In [68]:
!ls data/

Comments.csv


In [71]:
import pandas as pd 
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
import re
import spacy
from spacy.lang.fr.stop_words import STOP_WORDS
import string
from textblob import Blobber
from textblob_fr import PatternTagger, PatternAnalyzer
tb = Blobber(pos_tagger=PatternTagger(), analyzer=PatternAnalyzer())
import plotly.graph_objects as go
import plotly.express as px

In [97]:
df = pd.read_csv('./data/Comments.csv'); df.head()

Unnamed: 0.1,Unnamed: 0,Name,Comment,Month,Year
0,0,Réassurez-moi,Ravi ! La recherche et la comparaison des mutu...,8,2019
1,1,Réassurez-moi,Très satisfaite ! Un conseiller à l'écoute et ...,8,2019
2,2,Réassurez-moi,Je vous avais contacté pour la mise en place e...,8,2019
3,3,Réassurez-moi,Quelques mots sur Réassurez-moi. Je suis très ...,8,2019
4,4,Réassurez-moi,Votre cabinet m'a permis de pouvoir bénéficier...,8,2019


In [98]:
data = df.drop(['Unnamed: 0'],axis=1)

In [99]:
data.shape

(11017, 4)

In [100]:
NAN = [(c, data[c].isna().mean()*100) for c in data]
NAN = pd.DataFrame(NAN, columns=["column_name", "percentage"])
NAN.sort_values("percentage", ascending=False)

Unnamed: 0,column_name,percentage
1,Comment,0.14523
0,Name,0.0
2,Month,0.0
3,Year,0.0


In [101]:
data = data.dropna()

In [102]:
data.shape

(11001, 4)

In [103]:
NAN = [(c, data[c].isna().mean()*100) for c in data]
NAN = pd.DataFrame(NAN, columns=["column_name", "percentage"])
NAN.sort_values("percentage", ascending=False)

Unnamed: 0,column_name,percentage
0,Name,0.0
1,Comment,0.0
2,Month,0.0
3,Year,0.0


In [105]:
data["Comment"] = data["Comment"].str.lower()

In [106]:
AComment=[]
for comment in data["Comment"].apply(str):
    Word_Tok = []
    for word in  re.sub("\W"," ",comment ).split():
        Word_Tok.append(word)
    AComment.append(Word_Tok)

In [107]:
data["Word_Tok"] = AComment
data.head()

Unnamed: 0,Name,Comment,Month,Year,Word_Tok
0,Réassurez-moi,ravi ! la recherche et la comparaison des mutu...,8,2019,"[ravi, la, recherche, et, la, comparaison, des..."
1,Réassurez-moi,très satisfaite ! un conseiller à l'écoute et ...,8,2019,"[très, satisfaite, un, conseiller, à, l, écout..."
2,Réassurez-moi,je vous avais contacté pour la mise en place e...,8,2019,"[je, vous, avais, contacté, pour, la, mise, en..."
3,Réassurez-moi,quelques mots sur réassurez-moi. je suis très ...,8,2019,"[quelques, mots, sur, réassurez, moi, je, suis..."
4,Réassurez-moi,votre cabinet m'a permis de pouvoir bénéficier...,8,2019,"[votre, cabinet, m, a, permis, de, pouvoir, bé..."


In [108]:
stop_words=set(STOP_WORDS)

deselect_stop_words = ['n\'', 'ne','pas','plus','personne','aucun','ni','aucune','rien']
for w in deselect_stop_words:
    if w in stop_words:
        stop_words.remove(w)
    else:
        continue

In [109]:
AllfilteredComment=[]
for comment in data["Word_Tok"]:
    filteredComment = [w for w in comment if not ((w in stop_words) or (len(w) == 1))]
    AllfilteredComment.append(' '.join(filteredComment))

In [110]:
data["CommentAferPreproc"]=AllfilteredComment
data.head()

Unnamed: 0,Name,Comment,Month,Year,Word_Tok,CommentAferPreproc
0,Réassurez-moi,ravi ! la recherche et la comparaison des mutu...,8,2019,"[ravi, la, recherche, et, la, comparaison, des...",ravi recherche comparaison mutuelles simples r...
1,Réassurez-moi,très satisfaite ! un conseiller à l'écoute et ...,8,2019,"[très, satisfaite, un, conseiller, à, l, écout...",satisfaite conseiller écoute sympathique bon p...
2,Réassurez-moi,je vous avais contacté pour la mise en place e...,8,2019,"[je, vous, avais, contacté, pour, la, mise, en...",contacté mise place janvier dernier contrat mu...
3,Réassurez-moi,quelques mots sur réassurez-moi. je suis très ...,8,2019,"[quelques, mots, sur, réassurez, moi, je, suis...",mots réassurez satisfait services cabinet trou...
4,Réassurez-moi,votre cabinet m'a permis de pouvoir bénéficier...,8,2019,"[votre, cabinet, m, a, permis, de, pouvoir, bé...",cabinet permis pouvoir bénéficier contrat comp...


In [113]:
senti_list = []
for i in data["CommentAferPreproc"]:
    vs = tb(i).sentiment[0]
    if (vs > 0):
        senti_list.append('Positive')
    elif (vs < 0):
        senti_list.append('Negative')
    else:
        senti_list.append('Neutral')

> tb = textBlob.Blobber()

In [114]:
data["sentiment"] = senti_list
data.head()

Unnamed: 0,Name,Comment,Month,Year,Word_Tok,CommentAferPreproc,sentiment
0,Réassurez-moi,ravi ! la recherche et la comparaison des mutu...,8,2019,"[ravi, la, recherche, et, la, comparaison, des...",ravi recherche comparaison mutuelles simples r...,Positive
1,Réassurez-moi,très satisfaite ! un conseiller à l'écoute et ...,8,2019,"[très, satisfaite, un, conseiller, à, l, écout...",satisfaite conseiller écoute sympathique bon p...,Positive
2,Réassurez-moi,je vous avais contacté pour la mise en place e...,8,2019,"[je, vous, avais, contacté, pour, la, mise, en...",contacté mise place janvier dernier contrat mu...,Positive
3,Réassurez-moi,quelques mots sur réassurez-moi. je suis très ...,8,2019,"[quelques, mots, sur, réassurez, moi, je, suis...",mots réassurez satisfait services cabinet trou...,Positive
4,Réassurez-moi,votre cabinet m'a permis de pouvoir bénéficier...,8,2019,"[votre, cabinet, m, a, permis, de, pouvoir, bé...",cabinet permis pouvoir bénéficier contrat comp...,Positive
