# WhatsApp Chat Sentiment Analysis Between Me and My friend Using Python

In [2]:
pip install emoji

Collecting emoji
  Downloading emoji-2.2.0.tar.gz (240 kB)
     -------------------------------------- 240.9/240.9 kB 2.5 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: emoji
  Building wheel for emoji (setup.py): started
  Building wheel for emoji (setup.py): finished with status 'done'
  Created wheel for emoji: filename=emoji-2.2.0-py3-none-any.whl size=234925 sha256=2f2a288e9f66fe14932e65e01fd6d6959677378414ed762913323988540bfe4f
  Stored in directory: c:\users\moade\appdata\local\pip\cache\wheels\9a\b8\0f\f580817231cbf59f6ade9fd132ff60ada1de9f7dc85521f857
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-2.2.0
Note: you may need to restart the kernel to use updated packages.


In [1]:
import re
import pandas as pd
import numpy as np
import emoji
from collections import Counter
import matplotlib.pyplot as plt
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

# Sentimental Analysis Part
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sentiments = SentimentIntensityAnalyzer()

# NB: **The code below works for a group chat dataset or conversation with one person. All the functions defined below will prepare your data for the task of sentiment analysis as well as for any data science task**

# Extracting time from the chat

In [2]:
def date_time(s):
    pattern = '^([0-9]+)(\/)([0-9]+)(\/)([0-9]+), ([0-9]+):([0-9]+)[ ]?(AM|PM|am|pm)? -'
    return bool(result := re.match(pattern, s))


# Find Authors or Contact

In [3]:
def find_author(s):
    s = s.split(":")
    if len(s) == 2:
        return True
    else:
        return False


# Finding Messages

In [4]:
def getDatapoint(line):
    splitline = line.split(' - ')
    dateTime = splitline[0]
    date, time = dateTime.split(", ")
    message = " ".join(splitline[1:])
    if find_author(message):
        splitmessage = message.split(": ")
        author = splitmessage[0]
        message = " ".join(splitmessage[1:])
    else:
        author= None
    return date, time, author, message

# Data Preparation

In [6]:
datawat = []
conversation = 'Turpsy_Cimav.txt'
with open(conversation, encoding="utf-8") as fp:
    fp.readline()
    messageBuffer = []
    date, time, author = None, None, None
    while True:
        line = fp.readline()
        if not line:
            break
        line = line.strip()
        if date_time(line):
            if len(messageBuffer) > 0:
                datawat.append([date, time, author, ' '.join(messageBuffer)])
            messageBuffer.clear()
            date, time, author, message = getDatapoint(line)
            messageBuffer.append(message)
        else:
            messageBuffer.append(line)

# The sentimental Analysis Part

In [9]:
df = pd.DataFrame(datawat, columns=["Date", 'Time', 'Author', 'Message'])
df['Date'] = pd.to_datetime(df['Date'])

datawat = df.dropna()
datawat["Positive"] = [sentiments.polarity_scores(i)["pos"] for i in datawat["Message"]]
datawat["Negative"] = [sentiments.polarity_scores(i)["neg"] for i in datawat["Message"]]
datawat["Neutral"] = [sentiments.polarity_scores(i)["neu"] for i in datawat["Message"]]
print(datawat.head())

        Date   Time                Author  \
0 2022-03-31  11:48  Sincerity_of_Purpose   
2 2022-03-31  11:48  Sincerity_of_Purpose   
3 2022-03-31  12:18          Turpsy_Cimav   
5 2022-04-03  15:54          Turpsy_Cimav   
6 2022-04-03  15:54  Sincerity_of_Purpose   

                                    Message  Positive  Negative  Neutral  
0                         Have you see this     0.000       0.0    1.000  
2  In case you have who might be interested     0.278       0.0    0.722  
3                             Okay,  thanks     1.000       0.0    0.000  
5   Na people in Mexico state qualifies sha     0.000       0.0    1.000  
6                                       Yes     1.000       0.0    0.000  


In [10]:
x = sum(datawat["Positive"])
y = sum(datawat["Negative"])
z = sum(datawat["Neutral"])

def sentiment_score(a, b, c):
    if (a>b) and (a>c):
        print("Positive 😊 ")
    elif (b>a) and (b>c):
        print("Negative 😠 ")
    else:
        print("Neutral 🙂 ")
sentiment_score(x, y, z)

Neutral 🙂 


* The data show that most of the messages between me and Turpsy Cimav are neutral. Which means it’s neither positive nor negative.
