In [9]:
import pandas as pd
import joblib
import csv
from sklearn.model_selection import train_test_split
import helpers
import sarcastic
from engagement import engagement_preprocessing
from satisfaction import satisfaction_preprocessing

In [10]:
pd.set_option('mode.chained_assignment', None)

# Display long column text
pd.options.display.max_colwidth = 10

In [11]:
# Train sarcasm classification model 
tokenizer, model = sarcastic.train()

In [12]:
df = pd.read_csv("data/RED/annotated/100_annotated_dialogues.csv")
df = df.rename(columns={'conversation id': 'conversation_id', 'post title': 'post_title', 'dialog turn': 'dialog_turn', 'emotion prediction': 'emotion_prediction'})

In [13]:
# Engagement hyperparameters
eng_threshold = [2, 3, 4]
num_turns_weight = [0.5, 1, 1.5]
interleaved_weight = [0.5, 1, 1.5]
token_length_weight = [0.01, .05, 0.25]
diff_weight = [-0.1, -0.5, -1]

# Satisfaction hyperparameters
sat_threshold = [0.5, 0.75, 1]
slope_weight = [0.5, 0.75, 1]
sentiment_change_weight = [0.5, 0.75, 1]
grateful_bonus_weight = [1.5, 2, 2.5]
profanity_penalty_weight = [0.5, 0.75, 1]
sarcasm_penalty_weight = [0.5, 0.75, 1]
disagreement_penalty_weight = [0.5, 0.75, 1]

hp = {
    "eng_threshold": eng_threshold,
    "num_turns_weight": num_turns_weight,
    "interleaved_weight": interleaved_weight,
    "token_length_weight": token_length_weight,
    "diff_weight": diff_weight,
    "sat_threshold": sat_threshold,
    "slope_weight": slope_weight,
    "sentiment_change_weight": sentiment_change_weight,
    "grateful_bonus_weight": grateful_bonus_weight,
    "profanity_penalty_weight": profanity_penalty_weight,
    "sarcasm_penalty_weight": sarcasm_penalty_weight,
    "disagreement_penalty_weight": disagreement_penalty_weight   
}

# Names of hyperparameters
n_hp = list(hp)

In [14]:
# Group conversations by conversation_id and subreddit
grouped = df.groupby(['conversation_id', 'subreddit']).groups

# Train-test split conversations
s = pd.Series(grouped)
val, test = [i.to_dict() for i in train_test_split(s, train_size=0.5, random_state=42)]

In [7]:
# Hyperparameter tuning
cols = df.columns.tolist()
df_val_preds = pd.DataFrame(columns=cols)
best_params = {}
best_e = 0
best_s = 0

for p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12 in zip(hp[n_hp[0]], hp[n_hp[1]], hp[n_hp[2]], hp[n_hp[3]], hp[n_hp[4]], hp[n_hp[5]], hp[n_hp[6]], hp[n_hp[7]], hp[n_hp[8]], hp[n_hp[9]], hp[n_hp[10]], hp[n_hp[11]]): 
    print('Testing with hyperparameters: ', p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12)
    for conv_id, subreddit in val:
        conversation, speaker, listener = helpers.extract_responses(conv_id, subreddit, df)

        # Predict engagement
        num_turns, interleaved, token_length_score, num_turn_diff, conversation = engagement_preprocessing(speaker, listener, conversation)
        engagement_score = p2*num_turns + p3*interleaved + p4*token_length_score + p5*num_turn_diff
        engagement = 1 if engagement_score >= p1 else 0
        conversation['predicted_engagement'] = engagement

        # Predict satisfaction
        slope, sentiment_change, grateful_bonus, profanity_penalty, sarcasm_penalty, disagreement_penalty = satisfaction_preprocessing(conversation, speaker, tokenizer, model)
        satisfaction_score = p7*slope + p8*sentiment_change + p9*grateful_bonus + p10*profanity_penalty + p11*sarcasm_penalty + p12*disagreement_penalty
        satisfaction = 1 if satisfaction_score >= p6 else 0
        conversation['predicted_satisfaction'] = satisfaction

        df_val_preds = df_val_preds.append(conversation)

    df_val_preds = df_val_preds[['conversation_id', 'subreddit', 'post_title', 'author', 'dialog_turn', 'text', 'ground_truth_satisfaction', 'ground_truth_engagement', 'predicted_satisfaction',\
                                 'predicted_engagement', 'compound', 'sentiment', 'emotion_prediction', 'token_length', 'sentences', 'sentence_compounds', 'strongest_compound']]
    
    first_utters_val = df_val_preds.groupby(['conversation_id', 'subreddit']).first().reset_index()

    P_s, R_s, f1_s, acc_s, P_e, R_e, f1_e, acc_e = helpers.test(first_utters_val)
    
    if f1_e > best_e:
        best_e = f1_e
        best_params[n_hp[0]] = p1
        best_params[n_hp[1]] = p2
        best_params[n_hp[2]] = p3
        best_params[n_hp[3]] = p4
        best_params[n_hp[4]] = p5
        best_params['P_e'] = P_e
        best_params['R_e'] = R_e
        best_params['f1_e'] = f1_e
        best_params['acc_e'] = acc_e
        print('Current best hyperparameters are :', best_params)
        
    if f1_s > best_s:
        best_s = f1_s
        best_params[n_hp[5]] = p6
        best_params[n_hp[6]] = p7
        best_params[n_hp[7]] = p8
        best_params[n_hp[8]] = p9
        best_params[n_hp[9]] = p10
        best_params[n_hp[10]] = p11
        best_params[n_hp[11]] = p12
        best_params['P_s'] = P_s
        best_params['R_s'] = R_s
        best_params['f1_s'] = f1_s
        best_params['acc_s'] = acc_s
        print('Current best hyperparameters are :', best_params)

best_params

Testing with hyperparameters:  3 1 1 0.05 -0.5 0.5 1 1 2 1 1 1
Current best hyperparameters are : {'eng_threshold': 3, 'num_turns_weight': 1, 'interleaved_weight': 1, 'token_length_weight': 0.05, 'diff_weight': -0.5, 'P_e': 0.92, 'R_e': 1.0, 'f1_e': 0.9583333333333334, 'acc_e': 0.92}
Current best hyperparameters are : {'eng_threshold': 3, 'num_turns_weight': 1, 'interleaved_weight': 1, 'token_length_weight': 0.05, 'diff_weight': -0.5, 'P_e': 0.92, 'R_e': 1.0, 'f1_e': 0.9583333333333334, 'acc_e': 0.92, 'sat_threshold': 0.5, 'slope_weight': 1, 'sentiment_change_weight': 1, 'grateful_bonus_weight': 2, 'profanity_penalty_weight': 1, 'sarcasm_penalty_weight': 1, 'disagreement_penalty_weight': 1, 'P_s': 0.7567567567567568, 'R_s': 0.7777777777777778, 'f1_s': 0.7671232876712328, 'acc_s': 0.66}


{'eng_threshold': 3,
 'num_turns_weight': 1,
 'interleaved_weight': 1,
 'token_length_weight': 0.05,
 'diff_weight': -0.5,
 'P_e': 0.92,
 'R_e': 1.0,
 'f1_e': 0.9583333333333334,
 'acc_e': 0.92,
 'sat_threshold': 0.5,
 'slope_weight': 1,
 'sentiment_change_weight': 1,
 'grateful_bonus_weight': 2,
 'profanity_penalty_weight': 1,
 'sarcasm_penalty_weight': 1,
 'disagreement_penalty_weight': 1,
 'P_s': 0.7567567567567568,
 'R_s': 0.7777777777777778,
 'f1_s': 0.7671232876712328,
 'acc_s': 0.66}

In [None]:
### TEST ###
# TODO: Give best hyperparameters found in validation
# TODO: Predict for the test set:
for conv_id, subreddit in test:
    conversation, speaker, listener = extract_responses(conv_id, subreddit)
    
    num_turns, interleaved, token_length_score, num_turn_diff, conversation = engagement_preprocessing(speaker, listener, conversation)
    engagement_score = num_turns_weight*num_turns + interleaved_weight*interleaved + token_length_weight*token_length_score + diff_weight*num_turn_diff
    engagement = 1 if engagement_score >= eng_threshold else 0
    conversation['predicted_engagement'] = engagement
    
    slope, sentiment_change, grateful_bonus, profanity_penalty, sarcasm_penalty, disagreement_penalty = satisfaction_preprocessing(conversation, speaker, tokenizer, model)
    satisfaction_score = slope_weight*slope + sentiment_change_weight*sentiment_change + grateful_bonus_weight*grateful_bonus + profanity_penalty_weight*profanity_penalty + sarcasm_penalty_weight*sarcasm_penalty + disagreement_penalty_weight*disagreement_penalty
    satisfaction = 1 if satisfaction_score >= sat_threshold else 0
    conversation['predicted_satisfaction'] = satisfaction
    
    df_test_preds = df_test_preds.append(conversation)

df_test_preds = df_test_preds[['conversation_id', 'subreddit', 'post_title', 'author', 'dialog_turn', 'text', 'ground_truth_satisfaction', 'ground_truth_engagement', 
                'predicted_satisfaction', 'predicted_engagement', 'compound', 'sentiment', 'emotion_prediction', 'token_length', 'sentences', 'sentence_compounds', 'strongest_compound']]
df_test_preds.to_csv("data/RED/annotated/test_predictions.csv", index=False)

df_test_preds = df_test_preds.groupby(['conversation_id', 'subreddit']).first().reset_index()
# TODO: Test performance of test set:
test_P_s, test_R_s, test_f1_s, test_acc_s, test_P_e, test_R_e, test_f1_e, test_acc_e = helpers.test(first_utters_test)

NEXT STEPS: 
1. Apply algorithm with best values to entire dyadic dataset
2. Retrain emobert on whole dataset on sentence level
3. Write guidelines