
#  <font color='red'> French Sentiment Analysis Using TextBlob </font>

# I hope you find this kernel useful
# Your <font color='blue'> UPVOTES </font> would be highly appreciated

In [1]:
!pip install textblob_fr     #for installation


Collecting textblob_fr
  Downloading textblob_fr-0.2.0-py2.py3-none-any.whl (561 kB)
[K     |████████████████████████████████| 561 kB 1.3 MB/s 
Installing collected packages: textblob-fr
Successfully installed textblob-fr-0.2.0
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [2]:
import pandas as pd 
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
import re
import spacy
from spacy.lang.fr.stop_words import STOP_WORDS
import string
from textblob import Blobber
from textblob_fr import PatternTagger, PatternAnalyzer
tb = Blobber(pos_tagger=PatternTagger(), analyzer=PatternAnalyzer())
import plotly.graph_objects as go
import plotly.express as px


In [3]:
data = pd.read_csv('../input/insurance-reviews-france/Comments.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Name,Comment,Month,Year
0,0,Réassurez-moi,Ravi ! La recherche et la comparaison des mutu...,8,2019
1,1,Réassurez-moi,Très satisfaite ! Un conseiller à l'écoute et ...,8,2019
2,2,Réassurez-moi,Je vous avais contacté pour la mise en place e...,8,2019
3,3,Réassurez-moi,Quelques mots sur Réassurez-moi. Je suis très ...,8,2019
4,4,Réassurez-moi,Votre cabinet m'a permis de pouvoir bénéficier...,8,2019


 <font color='gris'> **Drop Unnamed Column** </font>

In [4]:
data= data.drop(['Unnamed: 0'],axis=1)


 <font color='gris'> Check NAN values </font>

In [5]:
NAN = [(c, data[c].isna().mean()*100) for c in data]
NAN = pd.DataFrame(NAN, columns=["column_name", "percentage"])
NAN.sort_values("percentage", ascending=False)

Unnamed: 0,column_name,percentage
1,Comment,0.14523
0,Name,0.0
2,Month,0.0
3,Year,0.0


 <font color='gris'> **Drop NAN Values** </font>

In [6]:
data =data.dropna()

 <font color='gris'> **Verify NAN Values** </font>

In [7]:
NAN = [(c, data[c].isna().mean()*100) for c in data]
NAN = pd.DataFrame(NAN, columns=["column_name", "percentage"])
NAN.sort_values("percentage", ascending=False)

Unnamed: 0,column_name,percentage
0,Name,0.0
1,Comment,0.0
2,Month,0.0
3,Year,0.0



 <font color='gris'> **Convert the data into lowercase**</font>

In [8]:
data["Comment"]= data["Comment"].str.lower()


 <font color='gris'> **Word Tokenization and deleting punctuation** </font>

In [9]:
AComment=[]
for comment in data["Comment"].apply(str):
    Word_Tok = []
    for word in  re.sub("\W"," ",comment ).split():
        Word_Tok.append(word)
    AComment.append(Word_Tok)


 <font color='gris'> **Add New Column to our data** </font>

In [10]:
data["Word_Tok"]= AComment
data.head()

Unnamed: 0,Name,Comment,Month,Year,Word_Tok
0,Réassurez-moi,ravi ! la recherche et la comparaison des mutu...,8,2019,"[ravi, la, recherche, et, la, comparaison, des..."
1,Réassurez-moi,très satisfaite ! un conseiller à l'écoute et ...,8,2019,"[très, satisfaite, un, conseiller, à, l, écout..."
2,Réassurez-moi,je vous avais contacté pour la mise en place e...,8,2019,"[je, vous, avais, contacté, pour, la, mise, en..."
3,Réassurez-moi,quelques mots sur réassurez-moi. je suis très ...,8,2019,"[quelques, mots, sur, réassurez, moi, je, suis..."
4,Réassurez-moi,votre cabinet m'a permis de pouvoir bénéficier...,8,2019,"[votre, cabinet, m, a, permis, de, pouvoir, bé..."


<font color='gris'> **set of Spacy's default stop words and delete negation words** </font>

In [11]:
stop_words=set(STOP_WORDS)

deselect_stop_words = ['n\'', 'ne','pas','plus','personne','aucun','ni','aucune','rien']
for w in deselect_stop_words:
    if w in stop_words:
        stop_words.remove(w)
    else:
        continue

<font color='gris'> **Delete StopWords** </font>

In [12]:
AllfilteredComment=[]
for comment in data["Word_Tok"]:
    filteredComment = [w for w in comment if not ((w in stop_words) or (len(w) == 1))]
    AllfilteredComment.append(' '.join(filteredComment))

<font color='gris'> **Add new Column: Comment after preprocessing** </font>

In [13]:
data["CommentAferPreproc"]=AllfilteredComment
data.head()

Unnamed: 0,Name,Comment,Month,Year,Word_Tok,CommentAferPreproc
0,Réassurez-moi,ravi ! la recherche et la comparaison des mutu...,8,2019,"[ravi, la, recherche, et, la, comparaison, des...",ravi recherche comparaison mutuelles simples r...
1,Réassurez-moi,très satisfaite ! un conseiller à l'écoute et ...,8,2019,"[très, satisfaite, un, conseiller, à, l, écout...",satisfaite conseiller écoute sympathique bon p...
2,Réassurez-moi,je vous avais contacté pour la mise en place e...,8,2019,"[je, vous, avais, contacté, pour, la, mise, en...",contacté mise place janvier contrat mutuelle s...
3,Réassurez-moi,quelques mots sur réassurez-moi. je suis très ...,8,2019,"[quelques, mots, sur, réassurez, moi, je, suis...",mots réassurez satisfait services cabinet trou...
4,Réassurez-moi,votre cabinet m'a permis de pouvoir bénéficier...,8,2019,"[votre, cabinet, m, a, permis, de, pouvoir, bé...",cabinet permis pouvoir bénéficier contrat comp...


<font color='gris'> **Sentiment Analysis with TextBlob** </font>

In [14]:
senti_list = []
for i in data["CommentAferPreproc"]:
    vs = tb(i).sentiment[0]
    if (vs > 0):
        senti_list.append('Positive')
    elif (vs < 0):
        senti_list.append('Negative')
    else:
        senti_list.append('Neutral')   



<font color='gris'> **Add Column: Sentiment** </font>

In [15]:
data["sentiment"]=senti_list
data.head()

Unnamed: 0,Name,Comment,Month,Year,Word_Tok,CommentAferPreproc,sentiment
0,Réassurez-moi,ravi ! la recherche et la comparaison des mutu...,8,2019,"[ravi, la, recherche, et, la, comparaison, des...",ravi recherche comparaison mutuelles simples r...,Positive
1,Réassurez-moi,très satisfaite ! un conseiller à l'écoute et ...,8,2019,"[très, satisfaite, un, conseiller, à, l, écout...",satisfaite conseiller écoute sympathique bon p...,Positive
2,Réassurez-moi,je vous avais contacté pour la mise en place e...,8,2019,"[je, vous, avais, contacté, pour, la, mise, en...",contacté mise place janvier contrat mutuelle s...,Positive
3,Réassurez-moi,quelques mots sur réassurez-moi. je suis très ...,8,2019,"[quelques, mots, sur, réassurez, moi, je, suis...",mots réassurez satisfait services cabinet trou...,Positive
4,Réassurez-moi,votre cabinet m'a permis de pouvoir bénéficier...,8,2019,"[votre, cabinet, m, a, permis, de, pouvoir, bé...",cabinet permis pouvoir bénéficier contrat comp...,Positive


**Data Visualization**

In [16]:
Number_sentiment= data.groupby(["sentiment"])["Name"].count().reset_index().reset_index(drop=True)

In [17]:
fig = px.histogram(data, x="sentiment",color="sentiment")
fig.update_layout(
    title_text='Sentiment of reviews', # title of plot
    xaxis_title_text='Sentiment', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [18]:
fig = px.pie(Number_sentiment, values=Number_sentiment['Name'], names=Number_sentiment['sentiment'], color_discrete_sequence=px.colors.sequential.Emrld
)
fig.show()

In [19]:
fig = px.histogram(data, x="Name",color="Name")
fig.update_layout(
    title_text='Number of Comments per Assurance', # title of plot
    xaxis_title_text='Assurance', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [20]:
fig = px.histogram(data, x="Year",color="Year")
fig.update_layout(
    title_text='Number of Comments per Year', # title of plot
    xaxis_title_text='Year', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [21]:
fig = px.histogram(data, x="Month",color="Month")
fig.update_layout(
    title_text='Number of Comments per Month', # title of plot
    xaxis_title_text='Month', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [22]:
fig = px.histogram(data, x="Name",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Assurance', # title of plot
    xaxis_title_text='Assurance', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [23]:
fig = px.histogram(data, x="Year",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Year', # title of plot
    xaxis_title_text='Year', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [24]:
fig = px.histogram(data, x="Month",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Month', # title of plot
    xaxis_title_text='Month', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

**Sentiment analysis in 2015**

In [25]:
Data_2015 = data [(data['Year'] == 2015) ].reset_index(drop=True)

In [26]:
Number_sentiment_2015= Data_2015.groupby(["sentiment"])["Name"].count().reset_index().reset_index(drop=True)

In [27]:
fig = px.histogram(Data_2015, x="sentiment",color="sentiment")
fig.update_layout(
    title_text='Sentiment of reviews in 2015', # title of plot
    xaxis_title_text='Sentiment', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [28]:
fig = px.pie(Number_sentiment_2015, values=Number_sentiment_2015['Name'], names=Number_sentiment_2015['sentiment'], color_discrete_sequence=px.colors.sequential.Darkmint
)
fig.show()

In [29]:
fig = px.histogram(Data_2015, x="Name",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Assurance in 2015', # title of plot
    xaxis_title_text='Assurance', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [30]:
fig = px.histogram(Data_2015, x="Month",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Month in 2015', # title of plot
    xaxis_title_text='Month', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

**Sentiment analysis in 2016**


In [31]:
Data_2016 = data [(data['Year'] == 2016) ].reset_index(drop=True)

In [32]:
Number_sentiment_2016= Data_2016.groupby(["sentiment"])["Name"].count().reset_index().reset_index(drop=True)

In [33]:
Data_2016 = data [(data['Year'] == 2016) ].reset_index(drop=True)
fig = px.histogram(Data_2016, x="sentiment",color="sentiment")
fig.update_layout(
    title_text='Sentiment of reviews in 2016', # title of plot
    xaxis_title_text='Sentiment', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [34]:
fig = px.pie(Number_sentiment_2016, values=Number_sentiment_2016['Name'], names=Number_sentiment_2016['sentiment'], color_discrete_sequence=px.colors.sequential.Darkmint
)
fig.show()

In [35]:
fig = px.histogram(Data_2016, x="Name",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Assurance in 2016', # title of plot
    xaxis_title_text='Assurance', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [36]:
fig = px.histogram(Data_2016, x="Month",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Month in 2016', # title of plot
    xaxis_title_text='Month', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

**Sentiment analysis in 2017**


In [37]:
Data_2017 = data [(data['Year'] == 2017) ].reset_index(drop=True)

In [38]:
Number_sentiment_2017= Data_2017.groupby(["sentiment"])["Name"].count().reset_index().reset_index(drop=True)

In [39]:
fig = px.histogram(Data_2017, x="sentiment",color="sentiment")
fig.update_layout(
    title_text='Sentiment of reviews in 2017', # title of plot
    xaxis_title_text='Sentiment', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [40]:
fig = px.pie(Number_sentiment_2017, values=Number_sentiment_2017['Name'], names=Number_sentiment_2017['sentiment'], color_discrete_sequence=px.colors.sequential.Emrld
)
fig.show()

In [41]:
fig = px.histogram(Data_2017, x="Name",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Assurance in 2017', # title of plot
    xaxis_title_text='Assurance', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [42]:
fig = px.histogram(Data_2017, x="Month",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Month in 2017', # title of plot
    xaxis_title_text='Month', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

**Sentiment analysis in 2018**


In [43]:
Data_2018 = data [(data['Year'] == 2018) ].reset_index(drop=True)

In [44]:
Number_sentiment_2018= Data_2018.groupby(["sentiment"])["Name"].count().reset_index().reset_index(drop=True)


In [45]:
fig = px.histogram(Data_2018, x="sentiment",color="sentiment")
fig.update_layout(
    title_text='Sentiment of reviews in 2018', # title of plot
    xaxis_title_text='Sentiment', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [46]:
fig = px.pie(Number_sentiment_2018, values=Number_sentiment_2018['Name'], names=Number_sentiment_2018['sentiment'], color_discrete_sequence=px.colors.sequential.Darkmint
)
fig.show()

In [47]:
fig = px.histogram(Data_2018, x="Name",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Assurance in 2018', # title of plot
    xaxis_title_text='Assurance', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [48]:
fig = px.histogram(Data_2018, x="Month",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Month in 2018', # title of plot
    xaxis_title_text='Month', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

**Sentiment analysis in 2019**


In [49]:
Data_2019 = data [(data['Year'] == 2019) ].reset_index(drop=True)

In [50]:
Number_sentiment_2019= Data_2019.groupby(["sentiment"])["Name"].count().reset_index().reset_index(drop=True)

In [51]:
fig = px.histogram(Data_2019, x="sentiment",color="sentiment")
fig.update_layout(
    title_text='Sentiment of reviews in 2019', # title of plot
    xaxis_title_text='Sentiment', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [52]:
fig = px.pie(Number_sentiment_2019, values=Number_sentiment_2019['Name'], names=Number_sentiment_2019['sentiment'], color_discrete_sequence=px.colors.sequential.Darkmint
)
fig.show()

In [53]:
fig = px.histogram(Data_2019, x="Name",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Assurance in 2019', # title of plot
    xaxis_title_text='Assurance', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [54]:
fig = px.histogram(Data_2019, x="Month",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Month in 2019', # title of plot
    xaxis_title_text='Month', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

**Sentiment analysis in 2020**


In [55]:
Data_2020 = data [(data['Year'] == 2020) ].reset_index(drop=True)

In [56]:
Number_sentiment_2020= Data_2020.groupby(["sentiment"])["Name"].count().reset_index().reset_index(drop=True)


In [57]:
fig = px.histogram(Data_2020, x="sentiment",color="sentiment")
fig.update_layout(
    title_text='Sentiment of reviews in 2020', # title of plot
    xaxis_title_text='Sentiment', # xaxis label
    yaxis_title_text='Count', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [58]:
fig = px.pie(Number_sentiment_2020, values=Number_sentiment_2020['Name'], names=Number_sentiment_2020['sentiment'], color_discrete_sequence=px.colors.sequential.Emrld
)
fig.show()

In [59]:
fig = px.histogram(Data_2020, x="Name",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Assurance in 2020', # title of plot
    xaxis_title_text='Assurance', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()

In [60]:
fig = px.histogram(Data_2020, x="Month",color="sentiment")
fig.update_layout(
    title_text='Sentiments per Month in 2020', # title of plot
    xaxis_title_text='Month', # xaxis label
    yaxis_title_text='Number of Comments', # yaxis label
    bargap=0.2, 
    bargroupgap=0.1
)
fig.show()