<a href="https://colab.research.google.com/github/janilles/sentimentAnalysis/blob/master/fb_messages.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sentiment analysis of Facebook Messenger data
Looking at sentiment change over time by chat participant.

# Libraries used

In [None]:
import json
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import altair as alt


# Import json file data

In [None]:
path_to_file = 'json_dump.json'

with open(path_to_file) as file:
    chat_history = json.load(file)

In [None]:
chat_history.keys()

# Load messages to dataframe

In [None]:
messages = pd.DataFrame(chat_history['messages'])

messages.head()


## Converting timestamp format to date

In [None]:
# helper function
def convert_time(timestamp):
    return pd.to_datetime(timestamp,
                          unit='ms') # because that's our format from json

In [None]:
# create new column
messages['date'] = messages['timestamp_ms'].apply(convert_time)

In [None]:
messages.head(2)

## Get month and year from date

In [None]:
# helper functions
def get_month(date):
    return date.month

def get_year(date):
    return date.year


In [None]:
# create new columns
messages['month'] = messages['date'].apply(get_month)
messages['year'] = messages['date'].apply(get_year)


# Sentiment analysis

In [None]:
# download dictionary of words 
# with positive/negative scores assigned
nltk.download('vader_lexicon')

## Create analyser object

In [None]:
# create analyser object
sentiment_analyser = SentimentIntensityAnalyzer()

## Get polarity scores

In [None]:
# get polarity scores
sentiment_analyser.polarity_scores('Have you ever wondered about all the personal data that Facebook collects on its over 2 billion users? It’s time to harness the information Facebook has on you for your own good and discover some insights.')

In [None]:
# helper function
def get_polarity(text):
    # we're only interested in the compound score
    return sentiment_analyser.polarity_scores(text)['compound']


In [None]:
# create new column for polarity scores
messages['sentiment'] = messages['content'].apply(get_polarity)

messages.head()


## Who's the more positive/negative participant

In [None]:
messages.groupby(
    'sender_name',
    as_index=False)['sentiment'].mean(
).sort_values('sentiment',
              ascending=False)


## Visualisation over time

In [None]:
year_month = messages.groupby(
    ['month',
     'year',
     'sender_name'],
    as_index=False)['sentiment'].mean()

year_month.head()


In [None]:
alt.Chart(year_month).mark_line().encode(
    x='month',
    y='sentiment',
    color='sender_name'
)