In [1]:
import warnings
warnings.filterwarnings("ignore")

import time
import pandas as pd
import csv
import helpers
import sarcastic
from engagement import engagement_preprocessing
from satisfaction import satisfaction_preprocessing
from helpers import round_sig

In [2]:
pd.set_option('mode.chained_assignment', None)

# Display long column text
pd.options.display.max_colwidth = 1000

In [3]:
# Train sarcasm classification model 
tokenizer, model = sarcastic.train()

In [56]:
subreddit_name = "offmychest"
in_path = "data/RED/clean/" + subreddit_name + "_clean.csv"
out_path = "data/RED/clean/labeled/" + subreddit_name + "_2_clean_labeled.csv"

In [40]:
df = pd.read_csv(in_path)
df = df.rename(columns={'conversation id': 'conversation_id', 'post title': 'post_title', 'dialog turn': 'dialog_turn', 'emotion prediction': 'emotion_prediction'})

In [51]:
half = int(len(df)/2)

In [52]:
# Group conversations by conversation_id and subreddit
grouped1 = df.iloc[0:half].groupby(['conversation_id']).groups
grouped2 = df.iloc[half:].groupby(['conversation_id']).groups

In [53]:
#s = [8167]
#subset = {x: grouped[x] for x in s}

In [54]:
# BEST HYPERPARAMETERS 

eng_threshold = 2.75
num_turns_weight = 0.75
interleaved_weight = 0.75
token_length_weight = 0.025
diff_weight = -0.25

sat_threshold = 0.6
slope_weight = 0.5
sentiment_change_weight = 0.5
grateful_bonus_weight = 3.25
profanity_penalty_weight = 0.5
sarcasm_penalty_weight = 0.5
disagreement_penalty_weight = 0.5

In [57]:
# PREDICT ENGAGEMENT AND SATISFACTION USING BEST HYPERPARAMETERS ON DATASET 

start = time.time()
cols = df.columns.tolist()
subreddit = df.iloc[0]['subreddit']
df_preds = pd.DataFrame(columns=cols)

for conv_id in grouped2:
    conversation, speaker, listener = helpers.extract_responses(conv_id, subreddit, df)
    
    print(conv_id)
    
    # Predict engagement
    num_turns, interleaved, token_length_score, num_turn_diff, num_speaker_responses, num_listener_responses, conversation = engagement_preprocessing(speaker, listener, conversation)
    
    if num_speaker_responses < 2 or num_turns < 3:
        continue
        
    engagement_score = num_turns_weight*num_turns + interleaved_weight*interleaved + token_length_weight*token_length_score + diff_weight*num_turn_diff
    engagement = 1 if engagement_score >= eng_threshold else 0
    conversation['predicted_engagement'] = engagement
    
    # Predict satisfaction
    slope, sentiment_change, grateful_bonus, profanity_penalty, sarcasm_penalty, disagreement_penalty = satisfaction_preprocessing(conversation, speaker, tokenizer, model)
    satisfaction_score = slope_weight*slope + sentiment_change_weight*sentiment_change + grateful_bonus_weight*grateful_bonus + profanity_penalty_weight*profanity_penalty + sarcasm_penalty_weight*sarcasm_penalty + disagreement_penalty_weight*disagreement_penalty
    satisfaction = 1 if satisfaction_score >= sat_threshold else 0
    conversation['predicted_satisfaction'] = satisfaction
    
    df_preds = df_preds.append(conversation)

df_preds = df_preds[['conversation_id', 'subreddit', 'post_title', 'author', 'dialog_turn', 'text', 'predicted_satisfaction', 'predicted_engagement', 'compound', 'sentiment', 'emotion_prediction', 'token_length', 'sentences', 'sentence_compounds', 'strongest_compound']]

df_preds.to_csv(out_path, index=False)

end = time.time()
minutes = (end - start) / 60

3
6
34
37
41
45
48
50
69
76
79
81
86
88
99
272
292
293
303
313
315
318
321
324
328
344
351
356
375
378
391
396
405
410
411
420
425
445
456
459
463
464
481
498
504
508
513
528
532
536
547
552
556
565
576
577
584
585
588
599
600
606
609
631
632
633
636
643
652
661
662
663
669
677
679
681
685
688
698
703
704
705
724
727
730
732
736
740
752
753
760
763
770
779
780
782
785
793
795
797
808
824
825
834
841
843
857
858
860
861
863
869
873
885
886
890
892
894
895
899
908
914
916
919
924
926
928
930
936
938
944
954
961
963
969
970
972
973
974
989
992
997
998
2715
2718
2723
2729
2731
2738
2745
2754
2758
2762
2764
2769
2772
2773
2774
2786
2787
2789
2807
2822
2826
2831
2832
2839
2868
2876
2877
2881
2883
2886
2889
2893
2894
2897
2898
2900
2901
2906
2908
2914
2917
2935
2949
2976
2978
2985
2989
3001
3002
3004
3010
3011
3013
3014
3017
3028
3047
3048
3054
3055
3059
3060
3073
3076
3078
3089
3090
3119
3125
3137
3140
3141
3155
3163
3176
3182
3184
3185
3187
3188
3191
3197
3219
3228
3229
3247
3254
3255
3257




277403
277404
277409
277421
277422
277423
277441
277453
277454
277458
277462
277463
277464
277466
277467
277474
277478
277479
277480
277495
277497
277502
277511
277514
277532
277539
277547
277549
277560
277561
277562
277569
277573
277576
277578
277586
277588
277603
277604
277605
277612
277613
277614
277626
277629
277633
277635
277638
277642
277643
277651
277653
277657
277659
277663
277668
277679
277683
277693
277695
277697
277701
277710
277712
277723
277732
277735
277744
277748
277751
277770
277772
277791
277813
277816
277818
277819
277824
277840
277841
277842
277849
277851
277852
277854
277866
277867
277868
277878
277891
277893
277901
277903
277906
277907
277908
277914
277925
277926
277928
277940
277949
277950
277952
277956
277965
277982
278000
278006
278017
278023
278024
278031
278035
278036
278043
278047
278054
278060
278065
278066
278068
278073
278076
278079
278082
278083
278087
278092
278093
278102
278106
278120
278125
278142
278145
278163
278168
278171
278173
278174
278189
278190



360556
360562
360563
360567
360570
360574
360582
360589
360596
360605
360608
360619
360626
360628
360638
360640
360644
360648
360650
360661
360669
360672
360673
360674
360679
360694
360711
360713
360721
360724
360725
360727
360733
360741
360743
360748
360750
360751
360754
360759
360767
360771
360772
360778
360780
360788
360799
360808
360809
360810
360813
360816
360818
360823
360834
360843
360871
360873
360888
360896
360921
360926
360931
360937
360946
360951
360958
360966
360981
361000
361020
361024
361029
361036
361057
361063
361067
361077
361081
361087
361101
361106
361108
361113
361121
361127




361144
361153
361167
361172
361173
361174
361181
361185
361211
361220
361222
361224
361226
361231
361237
361243
361254
361256
361274
361285
361287
361298
361306
361308
361309
361310
361321
361327
361335
361340
361343
361344
361348
361352
361364
361371
361390
361405
361417
361436
361442
361445
361446
361448
361451
361456
361457
361493
361497
361499
361528
361530
361536
361553
361555
361558
361561
361564
361565
361568
361589
361591
361606
361609
361996
362103
362108
362307
362386




362559




362700
362765
362961
363003
363139
363162
363204




363430
363817
364251
364258
364291
364402
364430
364477
364533
364586
364620
364707
364884
365272
365343
365467
365536
365771
366016
366022
366190
366209
366783
367003
367069
367101
367275
367662
367750
367880
368035
368254
368454
368469
368490
368540
368657
369017
369364
369555
369693
369937
370202
370556
370939
371057
371084
371106
371136
371199
371235
371287
371355
371362
371588
371825
371910
371929
371932
372178
372482
372521
372523
372573
372661
372765
373086
373112
373166
373255
373587




373629
373637
374026
374135
374375
374476
374592
374632
375234
375484
375683
375898
375989
376595
376672
376763
376813




376834
376957
377154
377685
377768
378020
378495
378553
378619




378705
378790
378982
379121
379346
379599
379775
379840
379847
380117
380226
380278
380280
380295
380448
380572
380602
380643
380843
380866
381135
382018
382057
382122
382159
382216
382468
382525
382953
382998
383027
383288
383596
383781
383859
384091
384214
384640
384934
384980
385158
385212
385265
385357
385586
385687
385876
385899
386104
386247
386434
386541
386625
386791
387189
387393
387475
387583
387596
387970
388072
388307
388398
388777
389061
389242
389321
390287
390318
390377
390379
390455
390542
390963
391345
391505
391579
392138
392291
392382
392454
392566
392788
392851
393188
393206
393247
393278
393282
393750
393848
393964
394024
394279
394289
394450
394513
394560
394820
394996
395000
395094
395221
395533
395675
396375
396529
396682
396726
396934
396958
396988
397034
397233
397888
398157
398272
398326
398364
398418
398503
398557
398618
398659
398863
398883
398979
399042
399593
399663
399717
400230
400419
400526
400636
400645
400758
400906
401778
401883
402846
403019
403132