In [None]:
import pandas as pd
import glob
import os
import logging

from utils.dialog_manipulation import add_typing_speed, add_sleep_bounds, add_subdialogs_stats

In [None]:
# Set up

DATAFRAME_PATH = "data/processed_dialog_files/general_dialogs_sentiment.csv"
USER_DATAFRAME_PATH = 'data/processed_dialog_files/user_stats.csv'
pd.set_option('display.max_rows', None)

In [None]:
# Checking the path

if not os.path.isfile(DATAFRAME_PATH):
    logging.error(f'No Dataframe associated with {DATAFRAME_PATH}')
else:
    df = pd.read_csv(DATAFRAME_PATH)
    df = df.rename(columns={'dialog ID': 'dialog_id'})

In [None]:
# Initialisation

def add_sleep_data(data: pd.DataFrame, user_df_path, save=True):
    """
    Add sleep data for each message in a dialog,
    and add it in a new column for a particular user,
    return new dataframe
    """
    gdf = pd.DataFrame(add_sleep_bounds(data))
    if save:
        gdf.to_csv(user_df_path, index=False)

def add_stats_data(data: pd.DataFrame, df_path, save=True):
    """
    Add mean data for each subdialogs in a dialog,
    and add it in a new column for a particular stats,
    return new dataframe
    """
    adf = add_subdialogs_stats(data)
    data['words_num_mean'] = adf['words_num_mean']
    data['reply_time_mean'] = adf['reply_time_mean']
    data['message_number_mean'] = adf['message_number_mean']
    if save:
        data.to_csv(df_path, index=False)

In [None]:
# Aggregating data
add_stats_data(df, DATAFRAME_PATH)

In [None]:
# User stats
add_sleep_data(df, USER_DATAFRAME_PATH)