In [1]:
import pandas as pd
import random
import warnings
from random import seed
from random import sample
from pandas.core.common import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

pd.options.display.max_colwidth = 10000

In [2]:
df = pd.read_csv("data/RED/depression_help_dyadic_convs_clean_emotion.csv", dtype={'conversation id': 'unicode'})
df = df.rename(columns={'conversation id': 'conversation_id', 'post title': 'post_title', 'dialog turn': 'dialog_turn', 'emotion prediction': 'emotion_prediction'})

# Group data by conversation id and calculate count of each conversation id
df_conv_len = df.groupby("conversation_id").count()
df_conv_len = df_conv_len.drop(columns=["subreddit", "post_title", "author", "text", "compound", "sentiment", "emotion_prediction"])
print("Number of conversations in subreddit: ", len(df_conv_len))

# Separate conversation id's with a single occurrence as monologues
df_mono = df_conv_len[df_conv_len["dialog_turn"] == 1]
#print("Number of conversations with a single turn in subreddit: ", len(df_mono))
df_mono_ids = df_mono.reset_index()
df_mono_ids = df_mono_ids["conversation_id"]

# Separate conversation id's with multiple occurrences as dialogues
df_dia = df_conv_len[df_conv_len["dialog_turn"] > 2]
print("Number of conversations longer than 2 turns in subreddit: ", len(df_dia))
df_dia = df_dia.reset_index()
df_dia = df_dia.drop(columns=['dialog_turn'])

# Join dialogue conversation id's with original data such that only dialogues remain in the dataset
df = df.join(df_dia.set_index('conversation_id'), on='conversation_id', how="right") 

# Separate conversations that have more than one author
df_conv_authors = df.groupby("conversation_id")["author"].unique().reset_index()
df_conv_authors["author"] = df_conv_authors["author"].apply(lambda x: x.size)
df_conv_authors = df_conv_authors[df_conv_authors["author"] > 1]
df_conv_authors = df_conv_authors.drop(columns=['author'])

# Join dialogue conversation id's with original data such that only conversations that have more than one author remain in the dataset
df = df.join(df_conv_authors.set_index('conversation_id'), on='conversation_id', how="right") 
print("Number of conversations longer than 2 turns with more than a single author in subreddit: ", len(df_conv_authors))

# That have have at least 2 speaker turns, correct this
#df = df[df1["author"] == df["author"].iloc[0]].count()[0] > 1
#number of speaker turns
#df1[df1["author"] == df1["author"].iloc[0]].count()[0]

df.reset_index(drop=True, inplace=True)
print("Number of rows in df: ", len(df))

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Number of conversations in subreddit:  23680
Number of conversations longer than 2 turns in subreddit:  2600
Number of conversations longer than 2 turns with more than a single author in subreddit:  2511
Number of rows in df:  9723


In [3]:
# select a random sample without replacement
# seed random number generator
seed(1)
# prepare a sequence
sequence = [i for i in range(len(df))]
# select a subset without replacement
subset = sample(sequence, 30)
print(subset)

[2201, 9325, 1033, 4179, 1931, 8117, 7364, 7737, 6219, 3439, 1537, 7993, 464, 6386, 7090, 34, 7297, 4363, 3748, 9685, 1674, 5200, 501, 365, 416, 8870, 150, 6245, 3548, 6915]


In [4]:
# Take conversation with row number 6245
df.take([6245])

Unnamed: 0,conversation_id,subreddit,post_title,author,dialog_turn,text,compound,sentiment,emotion_prediction
6245,8069,depression_help,Student in dire need for motivation,Aenarion21,1.0,"Hello. Like many others that come here, I need help. Or I think I need it. Or I need someone to tell me how to help myself. &amp;#x200B; I’m a student, 26 years old, currently working on my thesis (on human reliability analysis \[HRA\]) for my mechanical engineering degree. I really like this career. I have studied it for years, and though it hasn’t been easy, and I had some courses I really disliked, I passed every one of them. There were semesters where I spent most of my weekends at the university, where I didn’t see my friends for months and I slept like 2-hours most days; but I liked it there: the pressure, the difficult tests and all (mine is one of the top engineering universities in my country). After finishing my courses, I started working on a thesis about fluid mechanics and heat transfer, heavily focused on simulation of turbulent flow. I liked it. Before going for engineering, I thought about getting into science. And this felt like science, it was a cutting-edge investigation, heavily funded. But I couldn’t finish it. Suddenly, I didn’t have the structure of periodic test and a schedule to keep at uni. I had more “free” time that I knew what to do with it. But also, I started having problems with my thesis, specifically, I didn’t have the pc necessary to do the complicated simulations I needed to get my results. My Thesis Supervisor wanted to help but couldn’t get me the pc I needed, although he tried. &amp;#x200B; During this period, depression hit me hard, and I let this go on for several months. When I finally got myself to talk and tell everything to my Thesis Supervisor, I had already lost a year of my life. He understood completely, he was very supportive and kind, but I just couldn’t work on any thing related to fluid mechanics again, which was his area. &amp;#x200B; After that, and not long ago, I went to another professor, with whom I could be friends with (I think), if we weren’t student and professor and didn’t have so much age difference. Anyway, he understood what I went trough and offered me another thesis, on HRA, on which I’m currently working. This was a little before the holidays, and he encouraged me to travel or something, to clear my head. But now I’m back, and I must schedule a meeting with him soon. &amp;#x200B; Beside this, I have to tell you about my personal life. After a long while single, I have a girlfriend now, we have been together for some time and it’s going great. This relationship is my peace of mind and source of happiness. But my life at home isn’t great. We are not doing well on money and my parents fight a lot. They are also tidiness maniacs, which I’ve always hated. We have talked several times, and I know I’m not young, I should be working right now, and I think many of my mood problems would go away if I was living on my own, or with friends. But to do that, I need to finish my thesis, and I can’t find the will to do that. I thought the source of my depression was the hard times at university, and that it got worse when I couldn’t finish my previous thesis; but now, after I got another investigation topic and even after some great holidays with my girlfriend, I still can’t seem to find the will to do the research, to write about what I have read. Instead, I play some stupid game on my smartphone or watch series on Netflix. I don’t even like playing games too much or staying in bed until late, but still every time I try to work, I get sleepy, I play without really enjoying the game, or lose my time on memes or other web pages I don’t really care that much about anyway. Seems like anything goes before what I really have to do, but in my mind I’m all too conscious about what I have to do, yet it’s like I’m hitting an invisible wall every time I open my thesis. &amp;#x200B; First, I thought I would feel better when I got past the difficult courses at uni. Then I thought I would get better after changing my thesis. I thought I would feel better having a partner. Even when things with my family are perfect, I don’t feel so good. I know this is not as bad as other people’s situation. I know I could be doing a lot worse, but I don’t need that right now. I need to find a way to motivate myself to read and to write. &amp;#x200B; I’m not sure what I came looking for here, but maybe I just needed to write this whole thing down and hope someone would understand and say something helpful. Or maybe just posting this post, will help me understand myself a little bit better.",0.998,positive,guilty


In [5]:
df1 = df[df.conversation_id == 2686]
df2 = df[df.conversation_id == 2408]
df3 = df[df.conversation_id == 8035]
df4 = df[df.conversation_id == 569]
df5 = df[df.conversation_id == 8314]
df6 = df[df.conversation_id == 64]
df7 = df[df.conversation_id == 9854]
df8 = df[df.conversation_id == 4576]
df9 = df[df.conversation_id == 6635]
df9 = df[df.conversation_id == 6635]
df10 = df[df.conversation_id == 608]
df11 = df[df.conversation_id == 510]
df12 = df[df.conversation_id == 11697]
df13 = df[df.conversation_id == 8069]

#df13

In [6]:
df1["ground_truth_satisfaction"] = 1
df1["ground_truth_engagement"] = 1

df2["ground_truth_satisfaction"] = 1
df2["ground_truth_engagement"] = 1

df3["ground_truth_satisfaction"] = 0
df3["ground_truth_engagement"] = 1

df4["ground_truth_satisfaction"] = 1
df4["ground_truth_engagement"] = 1

df5["ground_truth_satisfaction"] = 1
df5["ground_truth_engagement"] = 1

df6["ground_truth_satisfaction"] = 0
df6["ground_truth_engagement"] = 1

df7["ground_truth_satisfaction"] = 1
df7["ground_truth_engagement"] = 1

df8["ground_truth_satisfaction"] = 1
df8["ground_truth_engagement"] = 1

df9["ground_truth_satisfaction"] = 1
df9["ground_truth_engagement"] = 1

df10["ground_truth_satisfaction"] = 0
df10["ground_truth_engagement"] = 1

df11["ground_truth_satisfaction"] = 1
df11["ground_truth_engagement"] = 1

df12["ground_truth_satisfaction"] = 1
df12["ground_truth_engagement"] = 1

df13["ground_truth_satisfaction"] = 1
df13["ground_truth_engagement"] = 1

In [7]:
df1

Unnamed: 0,conversation_id,subreddit,post_title,author,dialog_turn,text,compound,sentiment,emotion_prediction,ground_truth_satisfaction,ground_truth_engagement
2196,2686,depression_help,I am simply a mess.,holmes341,1.0,"I hate myself, first off. I do the wrong things, say the wrong things. I feel like im a huge dickhead. I think i critisize people too often. I feel so conflicted because i want to go to the people who make me feel sad and talk about my feelings but i feel like thats the wrong thing to do. Im awkward and i can barely start a conversation most of the time. Im overweight and at this point i think i might be crossing the line into obesity. I procratinate too much. I get mad over the most basic things like my mom telling me to do something. I want help from people i trust so badly but i feel like im just bugging them. I just want to change.",-0.7609,negative,joyful,1,1
2197,2686,depression_help,I am simply a mess.,midnightsummerlove,2.0,"First off - you are your own kind of beautiful. You are already on the right step - seeking help. It won't be an easy journey, but you will overcome this and you will come out stronger. I feel the same way, but there are people who can help you and that do care about you.",0.9287,positive,grateful,1,1
2198,2686,depression_help,I am simply a mess.,holmes341,3.0,"Thanks man, i really appreciate it :)",0.8464,positive,sympathizing,1,1
2199,2686,depression_help,I am simply a mess.,midnightsummerlove,4.0,How are you today?,0.0,neutral,guilty,1,1
2200,2686,depression_help,I am simply a mess.,holmes341,5.0,"Ive been pretty irritable today, but i havent really been depressed because ive been out with friends.",0.8427,positive,sentimental,1,1
2201,2686,depression_help,I am simply a mess.,midnightsummerlove,6.0,One step at a time :),0.4588,positive,excited,1,1


In [8]:
df_annotated = df1.append(df2, ignore_index = True)
df_annotated = df_annotated.append(df3, ignore_index = True)
df_annotated = df_annotated.append(df4, ignore_index = True)
df_annotated = df_annotated.append(df5, ignore_index = True)
df_annotated = df_annotated.append(df6, ignore_index = True)
df_annotated = df_annotated.append(df7, ignore_index = True)
df_annotated = df_annotated.append(df8, ignore_index = True)
df_annotated = df_annotated.append(df9, ignore_index = True)
df_annotated = df_annotated.append(df10, ignore_index = True)
df_annotated = df_annotated.append(df11, ignore_index = True)
df_annotated = df_annotated.append(df12, ignore_index = True)
df_annotated = df_annotated.append(df13, ignore_index = True)

In [9]:
# number of selected conversations
len(df_annotated.groupby("conversation_id").count())

13

In [10]:
df_annotated.head(2)

Unnamed: 0,conversation_id,subreddit,post_title,author,dialog_turn,text,compound,sentiment,emotion_prediction,ground_truth_satisfaction,ground_truth_engagement
0,2686,depression_help,I am simply a mess.,holmes341,1.0,"I hate myself, first off. I do the wrong things, say the wrong things. I feel like im a huge dickhead. I think i critisize people too often. I feel so conflicted because i want to go to the people who make me feel sad and talk about my feelings but i feel like thats the wrong thing to do. Im awkward and i can barely start a conversation most of the time. Im overweight and at this point i think i might be crossing the line into obesity. I procratinate too much. I get mad over the most basic things like my mom telling me to do something. I want help from people i trust so badly but i feel like im just bugging them. I just want to change.",-0.7609,negative,joyful,1,1
1,2686,depression_help,I am simply a mess.,midnightsummerlove,2.0,"First off - you are your own kind of beautiful. You are already on the right step - seeking help. It won't be an easy journey, but you will overcome this and you will come out stronger. I feel the same way, but there are people who can help you and that do care about you.",0.9287,positive,grateful,1,1


In [11]:
df_annotated.to_csv("data/RED/annotated/depression_help_annotated.csv", index=False)