In [None]:
import time
import pandas as pd
import joblib
import csv
from sklearn.model_selection import train_test_split
import helpers
import sarcastic
from engagement import engagement_preprocessing
from satisfaction import satisfaction_preprocessing

In [2]:
pd.set_option('mode.chained_assignment', None)

# Display long column text
pd.options.display.max_colwidth = 10

In [3]:
# Train sarcasm classification model 
tokenizer, model = sarcastic.train()

In [4]:
df = pd.read_csv("data/RED/annotated/100_annotated_dialogues_2.csv")
df = df.rename(columns={'conversation id': 'conversation_id', 'post title': 'post_title', 'dialog turn': 'dialog_turn', 'emotion prediction': 'emotion_prediction'})

In [14]:
# Engagement hyperparameters
eng_threshold = [3]
num_turns_weight = [1]
interleaved_weight = [1]
token_length_weight = [0.05]
diff_weight = [-0.5]

# Satisfaction hyperparameters
sat_threshold = [0.5] 
slope_weight = [0.5]
sentiment_change_weight = [0.5]
grateful_bonus_weight = [3] 
profanity_penalty_weight = [0.5]
sarcasm_penalty_weight = [0.5]
disagreement_penalty_weight = [0.5]

hp = {
    "eng_threshold": eng_threshold,
    "num_turns_weight": num_turns_weight,
    "interleaved_weight": interleaved_weight,
    "token_length_weight": token_length_weight,
    "diff_weight": diff_weight,
    "sat_threshold": sat_threshold,
    "slope_weight": slope_weight,
    "sentiment_change_weight": sentiment_change_weight,
    "grateful_bonus_weight": grateful_bonus_weight,
    "profanity_penalty_weight": profanity_penalty_weight,
    "sarcasm_penalty_weight": sarcasm_penalty_weight,
    "disagreement_penalty_weight": disagreement_penalty_weight   
}

# Names (keys) of hyperparameters
n_hp = list(hp)

In [15]:
# Group conversations by conversation_id and subreddit
grouped = df.groupby(['conversation_id', 'subreddit']).groups

# Train-test split conversations
s = pd.Series(grouped)
val, test = [i.to_dict() for i in train_test_split(s, train_size=0.5, random_state=42)]

In [16]:
# HYPERPARAMETER TUNING
start = time.time()
cols = df.columns.tolist()
df_val_preds = pd.DataFrame(columns=cols)

all_params_e, all_params_s = [], []
best_params_e, best_params_s, curr_params_e, curr_params_s = {}, {}, {}, {}
best_e, best_s = 0, 0

for p1 in hp[n_hp[0]]:
    for p2 in hp[n_hp[1]]:
        for p3 in hp[n_hp[2]]:
            for p4 in hp[n_hp[3]]:
                for p5 in hp[n_hp[4]]:
                    for p6 in hp[n_hp[5]]:
                        for p7 in hp[n_hp[6]]:
                            for p8 in hp[n_hp[7]]:
                                for p9 in hp[n_hp[8]]:
                                    for p10 in hp[n_hp[9]]:
                                        for p11 in hp[n_hp[10]]:
                                            for p12 in hp[n_hp[11]]:
                                                print('Testing with hyperparameters: ', p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12)
                                                for conv_id, subreddit in val:
                                                    conversation, speaker, listener = helpers.extract_responses(conv_id, subreddit, df)

                                                    # Predict engagement
                                                    num_turns, interleaved, token_length_score, num_turn_diff, conversation = engagement_preprocessing(speaker, listener, conversation)
                                                    engagement_score = p2*num_turns + p3*interleaved + p4*token_length_score + p5*num_turn_diff
                                                    engagement = 1 if engagement_score >= p1 else 0
                                                    conversation['predicted_engagement'] = engagement

                                                    # Predict satisfaction
                                                    slope, sentiment_change, grateful_bonus, profanity_penalty, sarcasm_penalty, disagreement_penalty = satisfaction_preprocessing(conversation, speaker, tokenizer, model)
                                                    satisfaction_score = p7*slope + p8*sentiment_change + p9*grateful_bonus + p10*profanity_penalty + p11*sarcasm_penalty + p12*disagreement_penalty
                                                    satisfaction = 1 if satisfaction_score >= p6 else 0
                                                    conversation['predicted_satisfaction'] = satisfaction

                                                    df_val_preds = df_val_preds.append(conversation)

                                                # Reorder columns
                                                df_val_preds = df_val_preds[['conversation_id', 'subreddit', 'post_title', 'author', 'dialog_turn', 'text', 'ground_truth_satisfaction', 'ground_truth_engagement', 'predicted_satisfaction',\
                                                                             'predicted_engagement', 'compound', 'sentiment', 'emotion_prediction', 'token_length', 'sentences', 'sentence_compounds', 'strongest_compound']]

                                                # Take first utterance of each conversation to get predictions and labels
                                                first_utters_val = df_val_preds.groupby(['conversation_id', 'subreddit']).first().reset_index()

                                                # Compare predictions to labels and return scores
                                                P_s, R_s, f1_s, acc_s, P_e, R_e, f1_e, acc_e = helpers.test(first_utters_val)

                                                # Current parameters
                                                curr_params_e[n_hp[0]] = p1
                                                curr_params_e[n_hp[1]] = p2
                                                curr_params_e[n_hp[2]] = p3
                                                curr_params_e[n_hp[3]] = p4
                                                curr_params_e[n_hp[4]] = p5
                                                curr_params_e['P_e'] = P_e
                                                curr_params_e['R_e'] = R_e
                                                curr_params_e['f1_e'] = f1_e
                                                curr_params_e['acc_e'] = acc_e
                                                curr_params_s[n_hp[5]] = p6
                                                curr_params_s[n_hp[6]] = p7
                                                curr_params_s[n_hp[7]] = p8
                                                curr_params_s[n_hp[8]] = p9
                                                curr_params_s[n_hp[9]] = p10
                                                curr_params_s[n_hp[10]] = p11
                                                curr_params_s[n_hp[11]] = p12
                                                curr_params_s['P_s'] = P_s
                                                curr_params_s['R_s'] = R_s
                                                curr_params_s['f1_s'] = f1_s
                                                curr_params_s['acc_s'] = acc_s

                                                # Append current parameters to list of all parameters
                                                all_params_e.append(curr_params_e.copy())
                                                all_params_s.append(curr_params_s.copy())

                                                # Update best parameters
                                                if f1_e > best_e:
                                                    best_e = f1_e
                                                    best_params_e = curr_params_e
                                                    print('Current best engagement hyperparameters are :', best_params_e)

                                                if f1_s > best_s:
                                                    best_s = f1_s
                                                    best_params_s = curr_params_s
                                                    print('Current best satisfaction hyperparameters are :', best_params_s)
                                                    
end = time.time()
print('Time it takes for grid search (in seconds): ', end - start)

Testing with hyperparameters:  3 1 1 0.05 -0.5 0.5 0.5 0.5 3 0.5 0.5 0.5
False negative satisfaction 398
False negative satisfaction 3183
False positive satisfaction 5024
False negative satisfaction 6836
False negative satisfaction 8314
False negative satisfaction 9854
False negative satisfaction 10633
False negative satisfaction 50030
False negative satisfaction 69061
False positive satisfaction 75539
False positive satisfaction 114807
False negative satisfaction 239904
False negative satisfaction 278531
Current best engagement hyperparameters are : {'eng_threshold': 3, 'num_turns_weight': 1, 'interleaved_weight': 1, 'token_length_weight': 0.05, 'diff_weight': -0.5, 'P_e': 0.92, 'R_e': 1.0, 'f1_e': 0.9583333333333334, 'acc_e': 0.92}
Current best satisfaction hyperparameters are : {'sat_threshold': 0.5, 'slope_weight': 0.5, 'sentiment_change_weight': 0.5, 'grateful_bonus_weight': 3, 'profanity_penalty_weight': 0.5, 'sarcasm_penalty_weight': 0.5, 'disagreement_penalty_weight': 0.5, 'P_s

In [8]:
with open('hyperparameters/best_params_engagement.txt','w') as data:
    data.write(str(best_params_e))
    
with open('hyperparameters/best_params_satisfaction.txt','w') as data:
    data.write(str(best_params_s))

In [13]:
with open('hyperparameters/all_params_engagement.txt','w') as data:
    data.write(str(all_params_e))
    
with open('hyperparameters/all_params_satisfaction.txt','w') as data:
    data.write(str(all_params_s))

In [None]:
'''
### TEST ###
# TODO: Give best hyperparameters found in validation
# TODO: Predict for the test set:
for conv_id, subreddit in test:
    conversation, speaker, listener = extract_responses(conv_id, subreddit)
    
    num_turns, interleaved, token_length_score, num_turn_diff, conversation = engagement_preprocessing(speaker, listener, conversation)
    engagement_score = num_turns_weight*num_turns + interleaved_weight*interleaved + token_length_weight*token_length_score + diff_weight*num_turn_diff
    engagement = 1 if engagement_score >= eng_threshold else 0
    conversation['predicted_engagement'] = engagement
    
    slope, sentiment_change, grateful_bonus, profanity_penalty, sarcasm_penalty, disagreement_penalty = satisfaction_preprocessing(conversation, speaker, tokenizer, model)
    satisfaction_score = slope_weight*slope + sentiment_change_weight*sentiment_change + grateful_bonus_weight*grateful_bonus + profanity_penalty_weight*profanity_penalty + sarcasm_penalty_weight*sarcasm_penalty + disagreement_penalty_weight*disagreement_penalty
    satisfaction = 1 if satisfaction_score >= sat_threshold else 0
    conversation['predicted_satisfaction'] = satisfaction
    
    df_test_preds = df_test_preds.append(conversation)

df_test_preds = df_test_preds[['conversation_id', 'subreddit', 'post_title', 'author', 'dialog_turn', 'text', 'ground_truth_satisfaction', 'ground_truth_engagement', 
                'predicted_satisfaction', 'predicted_engagement', 'compound', 'sentiment', 'emotion_prediction', 'token_length', 'sentences', 'sentence_compounds', 'strongest_compound']]
df_test_preds.to_csv("data/RED/annotated/test_predictions.csv", index=False)

df_test_preds = df_test_preds.groupby(['conversation_id', 'subreddit']).first().reset_index()
# TODO: Test performance of test set:
test_P_s, test_R_s, test_f1_s, test_acc_s, test_P_e, test_R_e, test_f1_e, test_acc_e = helpers.test(first_utters_test)
'''

NEXT STEPS: 
* Try grid search with a subset of the hyperparameters
* Apply algorithm with best values to entire dyadic dataset
* Retrain emobert on whole dataset on sentence level
* Write guidelines