### Imports

In [None]:
import pandas as pd
import json
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('seaborn')
import time
import numpy as np

import os

from datetime import datetime, timedelta

### Take in a filepath to a .json of Facebook chat data

In [None]:
# user inputs which file he wants to analyze
file_path = input("enter filepath of FB chat .json data : ")

with open(file_path) as f:
    raw = json.load(f)

# three columns I want in my Pandas DF
senders_list = []
timestamps_list = []
contents_list = []

# clean .json
for x in reversed(raw['messages']):
    if 'content' in x:
        senders_list.append(x['sender_name'])
        timestamps_list.append(x['timestamp_ms'])
        contents_list.append(x['content'])


# .json to dict
df = {'name': senders_list, 'timestamp' : timestamps_list, 'message': contents_list}

# dict to Pandas DF
clean = pd.DataFrame(data=df)

# saving a .csv of cleaned DF into different folder and then reading in back in
# not sure why to be honest
clean.to_csv('clean_csv_files/{}.csv'.format(file_path.split('/', 8)[7]))

data = pd.read_csv('clean_csv_files/{}.csv'.format(file_path.split('/', 8)[7]))

# turning timestamp into datatime object and subtracting 8 hours, since we are in California
data['date_time'] = pd.to_datetime(data['timestamp'], unit='ms')- timedelta(hours=8, minutes=0)

# making a specific column for hour so I can count number of messages per hour of the day
hours = []
for x in data.date_time:
    hours.append(x.hour)

data['hour'] = hours

## Messages by Hour of Day

In [None]:
def messages_by_hour():
    fig, ax = plt.subplots(figsize=(15, 5))
    f = data.hour.value_counts().sort_index().to_list()
    x = list(range(24))
    y = f
    ax.bar(x, y, color = 'purple')
    ax.set_title("Messages by Hour of Day -- {}".format(file_path.split('/', 8)[7].split('_')[0].upper()))
    ax.set_ylabel("# of Messages")
    ax.set_xlabel("Hour of Day")

## Shares of Messages by User

In [None]:
# turns a dictionary into a pie chart
def homemade_pie_with_name(dictionary, title, fig_w=13, fig_h=10):
    fig, ax = plt.subplots(figsize=(fig_w, fig_h))
    names = []
    counts = []
    for name, count in zip(dictionary.keys(), dictionary.values()):
        names.append(name)
        counts.append(count)
    
    out = ax.pie(counts, labels=names, autopct='%1.2f%%', shadow= True)
    ax.set_title(title)
    

    return out

## Message Counts per User by Time Slot

In [None]:
# splits the day into 4 six-hour timeslots and counts messages per person in each of those timeslots
def messages_by_timeslot():
    zero_to_six = data.loc[(data['hour']>=0) & (data['hour']< 6)] # midnight to 6 AM
    six_to_twelve = data.loc[(data['hour']>=6) & (data['hour']< 12)] # 6 AM to noon
    noon_to_six = data.loc[(data['hour']>=12) & (data['hour']< 18)] # noon to  6 PM
    six_to_midnight = data.loc[(data['hour']>=18) & (data['hour']< 24)] #6 PM to midnight

    first_quarter = zero_to_six['name'].value_counts().to_dict()
    second_quarter = six_to_twelve['name'].value_counts().to_dict()
    third_quarter = noon_to_six['name'].value_counts().to_dict()
    fourth_quarter = six_to_midnight['name'].value_counts().to_dict()

    red = "\033[1;31m"
    blue = "\033[1;34m"
    green = "\033[1;32m"
    purple = "\033[0;35m" 
    off = "\[\033[0m\]"

    print(purple, "Number of Messages from", "12AM to 6AM -- {}".format(file_path.split('/', 8)[7].split('_')[0].upper()))
    print('--------------------------------------')

    for key in first_quarter.keys():
        print (key, purple, first_quarter[key])

    print()

    print(blue, "Number of Messages from", "6AM to 12PM -- {}".format(file_path.split('/', 8)[7].split('_')[0].upper()))
    print('--------------------------------------')


    for key in second_quarter.keys():
        print (key, blue, second_quarter[key])

    print()

    print(green, "Number of Messages from", "12PM to 6PM -- {}".format(file_path.split('/', 8)[7].split('_')[0].upper()))
    print('--------------------------------------')



    for key in third_quarter.keys():
        print (key, green, third_quarter[key])


    print()

    print(red, "Number of Messages from", "6PM to 12AM -- {}".format(file_path.split('/', 8)[7].split('_')[0].upper()))
    print('--------------------------------------')



    for key in fourth_quarter.keys():
        print(key, red, fourth_quarter[key])


## Total Messages by User Over Time

In [None]:
def totals_over_time():
    partipants = raw['participants']
    names = []
    for x in partipants:
        names.append(x['name'].replace(" ", "").lower())

    dict_of_totals = {k:[0] for k in names}

    for person in data.name:
        list_name = person.replace(" ", "").lower()
        for k, v in dict_of_totals.items():
            if k != list_name:
                v.append(v[-1])
            else:
                v.append(v[-1]+1)

    fig, ax = plt.subplots(figsize=(15, 10))

    import random
    
    list_of_colors = ['#ff0000', '#ffa500', '#ffff00', '#008000', '#0000ff', '#4b0082', '#ee82ee', '#fed8b1', '#FDB927', '#39FF14', '#87ceeb', '#000000']

    for key, value in dict_of_totals.items():
        x = data.date_time
        y = value[:-1]

        hue = list_of_colors[random.randint(0,11)]
        
        ax.plot(x, y, label=key, color=hue)

    ax.legend()
    ax.set_title("Total Messages by User Over Time -- {}".format(file_path.split('/', 8)[7].split('_')[0].upper()))
    ax.set_ylabel("Total Messages")
    ax.set_xlabel("Date")

In [None]:
messages_by_hour()
plt.savefig('images/msg_by_hour_of_day.png')

In [None]:
totals = data['name'].value_counts().to_dict()
homemade_pie_with_name(totals, "Shares of Messages by User -- {}".format(file_path.split('/', 8)[7].split('_')[0].upper()));
plt.savefig('images/share_of_msg_per_user.png')

In [None]:
messages_by_timeslot()

In [None]:
totals_over_time()
plt.savefig('images/msg_totals_over_time.png')