In [None]:
import pandas as pd
import plotly.graph_objects as go

from plotly.subplots import make_subplots
from textblob import TextBlob
from datetime import datetime
from utils.dict_manipulation import get_ua_tonality_dict_combined, get_ru_tonality_dict
from utils.date import get_day_and_hour, get_week_day_from_number

In [None]:

data = pd.read_csv("data/new_type_dialogs_prepared/general_df4.csv")


In [None]:

tonality_dict_ua = get_ua_tonality_dict_combined()
tonality_dict_ru = get_ru_tonality_dict()


In [None]:

def calculate_msg_sentiment(msg, lang):
    """
    Calculate dictionary-base sentiment of a particular message.
    Return average sentiment of a message.
    
    Works for en, ua, ru
    """
    if not isinstance(msg, str):
        return 0
    
    if lang == "en":     
        return round(TextBlob(msg).sentiment.polarity, 4)
    elif lang == "ua":
        tonality_dict = tonality_dict_ua
    elif lang == "ru":
        tonality_dict = tonality_dict_ru
           
    tokenized_msg = msg.split()  
    overall_sentiment = 0
    words_num = 0
    
    for token in tokenized_msg:             
        token_sentiment = tonality_dict.get(token.lower(), 0)
        if token_sentiment:
            overall_sentiment += token_sentiment
            words_num += 1
    
    if not words_num:
        return 0
    
    avg_sentiment = overall_sentiment / words_num
    
    return avg_sentiment


def add_dialog_sentiment(data, save_to_file=False, 
                         save_path="data/processed_dialog_files/general_dialogs_sentiment.csv"):
    """
    Add sentiment for each message in a dialog,
    and add it in a new column for a particular message,
    return new dataframe
    
    Supported languages: eng, ru, ua
    """
    data['msg_sentiment'] = data.apply(lambda x: calculate_msg_sentiment(x['preprocessed_message'], 
                                                                         x['dialog_language']), 
                                       axis=1)
    
    if save_to_file:
        data.to_csv(save_path, index=False)
    
    return data


In [None]:
def calculate_avg_subdialog_sentiment(data):
    """
    Calculate average sentiment for each subdialog in a dataframe,
    save results in a new column and return an updated dataframe.
    """
    grouped_data = sentiment_data_for_each_msg.groupby(['from_id', 'dialog ID', 'subdialog_id'])
    data['avg_subdialog_sentiment'] = grouped_data['msg_sentiment'].transform(lambda x: x.mean().round(3))
    
    return data

# This adds 'sentiment' column to a df
sentiment_data_for_each_msg = add_dialog_sentiment(data) 

# This adds 'avg_subdialog_sentiment' column to a df
avg_sentiment_data = calculate_avg_subdialog_sentiment(sentiment_data_for_each_msg)


In [None]:
def add_hour_and_dayoweek(data):
    """
    Add hour and day of the week column to a dataframe
    
    Return updated dataframe
    """
    data[['hour_of_day', 'day_of_week']] = data.apply(lambda x: get_day_and_hour(x['date']), 
                                                      axis=1, 
                                                      result_type="expand")
    
    return data


def analyze_sentiment_for_hour_and_dayoweek(data):
    """
    Calculate sentiment by day of the week and hour
    for each user.
    
    Sentiment for a particular message is the average sentiment
    for a message in a particular dialog and subdialog
    
    Return a dataframe ready for visualization
    """
    grouped_by_date = data_with_hour_and_week.groupby(['from_id', 'day_of_week', 'hour_of_day'])
    finalized_data = grouped_by_date['avg_subdialog_sentiment'].mean().round(3).reset_index()
    
    return finalized_data

# This adds 'hour_of_day' and 'day_of_week' columns to a df
data_with_hour_and_week = add_hour_and_dayoweek(avg_sentiment_data)

# This combines data for a specific day of a week and hour, ready to be visualized
final_sentiment_data = analyze_sentiment_for_hour_and_dayoweek(data_with_hour_and_week)

final_sentiment_data

In [None]:

def visualize_user_sentiment(sentiment_data, user_id):
    """
    Visualize sentiment data for a specific user
    in a timeline (linear graph), starting at 00:00 on Monday,
    finishing at 23:59 on Sunday
    """
    
    user_sent_data = sentiment_data[sentiment_data['from_id'] == user_id]
    
    fig = make_subplots(rows=1, cols=7, 
                   subplot_titles=("Monday", "Tuesday", "Wednesday", "Thursday",
                                   "Friday", "Saturday", "Sunday"),
                   shared_yaxes=True,
                   x_title="Hours",
                   y_title="Sentiment",
                   horizontal_spacing=0.005)
    
    fig.update_layout(title_text=f"Weekly sentiment analysis for {user_id}", height=450)
    
    for day_of_week in range(1, 8):
        
        day_of_week_data = user_sent_data.query(f'day_of_week == {day_of_week}')
        hourly_data = dict(zip(day_of_week_data.hour_of_day, day_of_week_data.avg_subdialog_sentiment))
        hourly_data_dict = {hour : 0 for hour in range(24)}
        hourly_data_dict.update(hourly_data)

        fig.add_scatter(x=list(hourly_data_dict.keys()), 
                        y=list(hourly_data_dict.values()), 
                        name=get_week_day_from_number(day_of_week), 
                        row=1, col=day_of_week)

    
    fig.show()
    

    
visualize_user_sentiment(final_sentiment_data, 418590848)