In [None]:
import json
import numpy as np
import pandas as pd

def parse_payload(uploaded):
  json_string = list(uploaded.values())[0]
  # Parse the JSON string into a list of dictionaries
  list_of_conversations = json.loads(json_string)

  # Convert the list of dictionaries into a NumPy DataFrame
  df = pd.DataFrame(list_of_conversations["conversations"])

  # Convert the 'date' column to datetime format
  df['timestamp'] = pd.to_datetime(df['timestamp'])

  # Get unique usernames from the DataFrame
  unique_usernames = np.unique(df['username'])

  # Sort the unique usernames alphabetically
  sorted_unique_usernames = np.sort(unique_usernames)

  # Find the minimum and maximum dates
  min_date = df['timestamp'].min()
  max_date = df['timestamp'].max()

  # Print the DataFrame, unique usernames, and sorted unique usernames
  print("Data Summary:")
  print('  total message: {count}'.format(count=len(df)))
  print('  unique users: {count}'.format(count=len(sorted_unique_usernames)))
  print('  time range: {min_date} ~ {max_date}'.format(min_date=min_date, max_date=max_date))

  return {
      "payload": df,
      "unique_users": sorted_unique_usernames,
      "date_range": (min_date, max_date)
  }


In [2]:
from google.colab import files

def upload_conversation_file():
  uploaded = files.upload()
  for fn in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  return parse_payload(uploaded)

In [2]:
from datetime import datetime, timedelta
import ipywidgets as ipyw

from bokeh.models.widgets.inputs import AutocompleteInput
from IPython.display import display, clear_output



def on_button_clicked(b):
  global userDp
  global startDate
  global endDate
  global parsed_data

  clear_output(wait=True)
  print('Generating report for "{username}", {startDate} ~ {endDate}'.format(username=userDp.value,
                                                                             startDate=startDate.value,
                                                                             endDate=endDate.value))
  # Get the timerange
  startTS = pd.to_datetime(startDate.value)
  endTS = pd.to_datetime(endDate.value + timedelta(days=1))  # count by end of the day based on selection

  df = parsed_data['payload']
  if userDp.value == 'All':
    filtered_data = df[(df['timestamp'] >= startTS) &
                      (df['timestamp'] < endTS)]
  else:
    filtered_data = df[(df['username'] == userDp.value) &
                      (df['timestamp'] >= startTS) &
                      (df['timestamp'] < endTS)]
  print('Filtered message: {count}'.format(count=len(filtered_data)))

  # Generate word cloud for the filtered data
  generate_word_cloud(filtered_data)

  # TODO <You may add more analysis function based on filtered data

  # re-populate the selection form
  generate_report()

def generate_report():
  global userDp
  global reportBtn
  global startDate
  global endDate
  global parsed_data
  userDp = ipyw.Dropdown(
      options = ['All'] + list(parsed_data["unique_users"]),
      value = 'All',
      description="User: ")

  # Button for confirm all input are ready
  reportBtn = ipyw.Button(
    description='Generate Report',
    disabled=False,
    button_style='success', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Generate Report for the select user',
    icon='check'
  )

  # Date Picker for start & end
  start = datetime.date(parsed_data["date_range"][0])
  end = datetime.date(parsed_data["date_range"][1])
  startDate = ipyw.DatePicker(
    description='Start Date',
    value=start,
    min=start
  )
  endDate = ipyw.DatePicker(
    description='End Date',
    max=end,
    value=end
  )

  global_vbox = ipyw.VBox()
  global_vbox.children = [userDp, startDate, endDate, reportBtn]
  display(global_vbox)
  reportBtn.on_click(on_button_clicked)

In [3]:
import matplotlib.pyplot as plt
from wordcloud import WordCloud

def generate_word_cloud(filtered_data):
  # concat string
  concat_str = ' '.join(filtered_data['message'])

  # Create a WordCloud object
  wordcloud = WordCloud(width=800, height=400, background_color='white').generate(concat_str)

  # Display the word cloud using Matplotlib
  plt.figure(figsize=(10, 5))
  plt.imshow(wordcloud, interpolation='bilinear')
  plt.axis('off')  # Turn off axis labels and ticks
  plt.show()