In [15]:
!pip install convokit



In [16]:
import os
import convokit
from convokit import Corpus, PolitenessStrategies, download
import timeit
import re
from numpy import mean
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from collections import Counter
import numpy as np
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from tqdm import tqdm
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
import nltk
from transformers import BertTokenizer

In [None]:
# First time download the data
corpus = Corpus(download("winning-args-corpus"))

# Convert data to a dataframe df
utt_df = corpus.get_utterances_dataframe().drop(columns=['vectors'])
convo_df = corpus.get_conversations_dataframe().drop(columns=['vectors'])
speaker_df = corpus.get_speakers_dataframe().drop(columns=['vectors'])

# Save DataFrames to CSV files in the 'data' folder
utt_df.to_csv('utterances.csv', index=False)
utt_df = utt_df.reset_index()
convo_df.to_csv('conversations.csv')
convo_df = convo_df.reset_index()
speaker_df.to_csv('speakers.csv', index=False)

In [18]:
identity_p = """
Hello and welcome to your task as a Delta Classifier, where you will analyze comments within the context of Reddit's Change My View (CMV) threads to predict their likelihood of receiving a delta.
A delta in CMV symbolizes a successful change in the original poster's viewpoint, indicating a highly persuasive or enlightening response.

Task Overview:
1. Thread Context: You will be provided with the title and text of the thread to understand the initial argument or perspective.
This context is crucial for evaluating the relevance and impact of each comment.
Format:
Title: [Thread Title]
Text: [Thread Text]

2. Comment Hierarchy and Analysis: Each comment is part of a structured conversation, branching off from either the original post or subsequent comments.
The depth of a comment in this conversation tree is indicated by "@" symbols, where:
"@" denotes direct responses to the original post.
"@@" signifies responses to comments directly addressing the original post, and so forth.
For every comment under consideration, you will receive the branch text.
Your job is to analyze the content, considering its position in the discussion, the arguments presented, the evidence cited, and its persuasiveness or capacity to enlighten.

Comment Format:
Depth Indicator: [@ symbols indicating depth]
Text: [Comment Text]
"""

In [22]:
utt_df.head()

Unnamed: 0,id,timestamp,text,speaker,reply_to,conversation_id,meta.pair_ids,meta.success,meta.approved_by,meta.author_flair_css_class,...,meta.num_reports,meta.replies,meta.report_reasons,meta.saved,meta.score,meta.score_hidden,meta.subreddit,meta.subreddit_id,meta.ups,meta.user_reports
0,t3_2ro9ux,,I can't remember the topic that spurred this d...,seanyowens,,t3_2ro9ux,[],,,,...,,"[cnhplrm, cnhpp4o, cnhq330, cnhs7xb, cnhpnmr, ...",,,,,,,,
1,t1_cnhplrm,1420697092.0,"Look at the definition you provided, if we rem...",Account9726,t3_2ro9ux,t3_2ro9ux,[p_1],1.0,,points,...,,"{'kind': 'Listing', 'data': {'modhash': '', 'c...",,False,20.0,False,changemyview,t5_2w2s8,20.0,[]
2,t1_cnhrvq7,1420700886.0,∆. Yours was the first comment I read to make...,seanyowens,t1_cnhplrm,t3_2ro9ux,[p_1],1.0,,,...,,"{'kind': 'Listing', 'data': {'modhash': '', 'c...",,False,2.0,False,changemyview,t5_2w2s8,2.0,[]
3,t1_cnhz66d,1420713997.0,As delta bot noted you should edit your commen...,Nepene,t1_cnhrvq7,t3_2ro9ux,[],,,points,...,,,,False,2.0,False,changemyview,t5_2w2s8,2.0,[]
4,t1_cniauhy,1420748639.0,[deleted],[deleted],t1_cnhrvq7,t3_2ro9ux,[],,,,...,,"{'kind': 'Listing', 'data': {'modhash': '', 'c...",,False,1.0,False,changemyview,t5_2w2s8,1.0,[]


In [25]:
def get_comment_branch(df, comment_id):
    branch = []
    current_id = comment_id

    # Loop until a top-level comment is found
    while True:
        # Find the current comment
        current_comment = df[df['id'] == current_id]
        if current_comment['reply_to'].empty:
            break
        # Find the parent comment
        father_comment = df[df['id'] == current_comment['reply_to'].values[0]]
        # Add the current comment to the branch list
        branch.append(current_comment)

        # Update current_id to the parent_id of the current comment
        current_id = current_comment['reply_to'].values[0]

    # Concatenate all DataFrames in the branch list
    branch_df = pd.concat(branch[::-1], ignore_index=True)

    return branch_df

In [28]:
df = get_comment_branch(utt_df, 't1_cnhz66d')

In [36]:
def promp_build(comment_branch_df, conv_df):
  prompt = ''''''
  title = get_title(comment_branch_df, conv_df)
  prompt += f"Title: {title}\n"
  prompt += f"Text: {comment_branch_df['text'].values[0]}\n"
  for i in range(1, len(comment_branch_df)):
    prompt += f"{'@'*i} {comment_branch_df['text'].values[i]}\n"
  return prompt

def get_title(comment_branch_df, conv_df):
  post_id = comment_branch_df['conversation_id'].values[0]
  post_title = conv_df[conv_df['id'] == post_id]['meta.op-title'].values[0]
  return post_title


In [38]:
print(promp_build(df, convo_df))

Title: CMV: Anything that is man-made is natural.
Text: I can't remember the topic that spurred this discussion, but a friend and I were debating whether man-made things were natural. He took the position that they are unnatural. 

He cited this definition by Merriam-Webster:  existing in nature and not made or caused by people : coming from nature (http://www.merriam-webster.com/dictionary/natural) as his basis for the distinction for natural vs. unnatural.

However, I respectfully disagree with his position and furthermore that definition of natural. People arise from nature. Humankind's capacity to create, problem-solve, analyze, rationalize, and build also come from natural processes. How are the things we create unnatural? It is only through natural occurrences that we have this ability, why is it that we would give the credit of these things solely to man, as opposed to nature? We are not separate from nature, thus, how can any of our actions or creations be unnatural? If we were