In [None]:
import pandas as pd
import math, os, sys, glob
import numpy as np
import warnings
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings('ignore')
mpl.rcParams['figure.dpi'] = 500
pd.set_option('display.max_columns', 1000)
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
sys.path.append(os.path.abspath(os.path.join('../src/')))

from preprocess import Preprocess
from user import User
from action_logging import Logger
from plot_user import PlotUser
from plot import Plot
from plot_progression import PlotProgression
from generate_html import HTML

In [None]:
logger = Logger(log_flag = True, log_file = "run", log_path = "../logs/")

### Load and Clean data

In [None]:
preprocess = Preprocess(config_path='../settings.conf', logger=logger)

preprocess.load_data()
# preprocess.parse_data()
# preprocess.drop_message()
preprocess.prepare_df()
# preprocess.check_n_users()
# preprocess.print_sample(10)

preprocess.pd_data

### Analysis
#### User based analysis

In [None]:
user_data_list = []
for user_idx, user in enumerate(preprocess.users + ['Overall']):
    logger.write_logger(f"Starting for User: {user}")
    if user == 'Overall':
        user_subset_data = preprocess.pd_data.copy()
    else:
        user_subset_data = preprocess.pd_data[preprocess.pd_data['User'] == user]

    # Fetch user statistics --------------------------------------------------------------------------------------------
    user_data = User(
        user_name = user,
        color_map = preprocess.color_map,
        messages = user_subset_data['Message'],
        timestamp = user_subset_data['Timestamp'],
        users = user_subset_data['User'],
        logger = logger)
    user_data.get_clean_messages(). \
        get_link_count(). \
        get_media_count(). \
        get_emoji_count(). \
        get_total_stats(). \
        get_emoji_statistics(). \
        get_avg_stats(). \
        get_top_stats(data = preprocess.pd_data). \
        get_response_time(data = preprocess.pd_data)

    # Plot user statistics ---------------------------------------------------------------------------------------------
    plot_user_obj = PlotUser(user_object = user_data, user_idx = user_idx + 1)
    plot_user_obj.plot_top_k_ngrams(n_grams = 1, k = 10)
    plot_user_obj.plot_top_k_ngrams(n_grams = 2, k = 10)
    plot_user_obj.plot_top_k_ngrams(n_grams = 3, k = 10)
    user_data.pd_emoji_rank = plot_user_obj.plot_top_k_emojis(k = 5, normalize = True)
    plot_user_obj.plot_word_cloud()
    plot_user_obj.plot_word_cloud(n_grams = 2)
    plot_user_obj.plot_word_cloud(n_grams = 3)

    user_data_list.append(user_data)

    logger.write_logger(f"Ending for User: {user}")

### Plots - Overall

In [None]:
plot_obj = Plot(data = preprocess.pd_data, color_map = preprocess.color_map)
plot_obj.plot_date_n_msgs()
plot_obj.plot_weekday_n_msgs()
plot_obj.plot_hour_n_msgs()
plot_obj.plot_domain_counts(user_object = user_data_list[-1])
plot_obj.plot_date_n_emojis(user_object = user_data_list[-1])

### Plot - Progression

In [None]:
plot_progression_obj = PlotProgression(data = preprocess.pd_data, color_map = preprocess.color_map)
plot_progression_obj.plot_monthly_msg_progression()
plot_progression_obj.plot_monthly_word_progression(user_object = user_data_list[-1])
plot_progression_obj.plot_monthly_emoji_progression(user_object = user_data_list[-1])
plot_progression_obj.plot_monthly_response_time_progression(user_object = user_data_list[-1])

### Generate HTML

In [None]:
html_obj = HTML(
    user1=user_data_list[0],
    user2=user_data_list[1],
    overall=user_data_list[2],
    html_path="../html_template/index.html",
    logger=logger)

In [None]:
html_obj = html_obj.populate_members()

In [None]:
html_obj = html_obj.populate_html_txt()

In [None]:
html_obj = html_obj.populate_html_img()

In [None]:
html_obj.save_html()