In [1]:
import praw
import pandas as pd
from datetime import datetime
from collections import defaultdict
from datetime import timedelta
import itertools
import ipywidgets as widgets
import re
from IPython.display import HTML

reddit = praw.Reddit(
    user_agent="reports-tool",
    site_name="ssc"
)

pd.set_option('display.max_colwidth', 150)

In [2]:
queue = list(reddit.subreddit('mod').mod.reports(limit=None))

In [3]:
def get_reports(comment):
    mod_reports = [(reason, 1) for (reason, _) in comment.mod_reports]
    
    reports_dict = defaultdict(lambda: 0)
    for reports in comment.user_reports + mod_reports:
        reports_dict[reports[0]] += reports[1]
    return reports_dict

problematic_comments = []

def comment_to_dict(comment):
    try:
        reports = get_reports(comment)
        quality_reports = sum(count for reason, count in reports.items() if "quality" in reason)
        non_quality_reports = sum(count for reason, count in reports.items() if "quality" not in reason)
        return {
            "author": comment.author.name,
            "score": comment.score,
            "reports": reports,
            "quality": quality_reports,
            "non-quality": non_quality_reports,
            "age": datetime.now() - datetime.fromtimestamp(comment.created_utc),
            "body": comment.body,
            "object": comment,
            "permalink": "https://www.reddit.com" + comment.permalink
        }
    except Exception as e:
        print("Whoops! {0} {1}".format(comment, e))
        problematic_comments.append(comment)
        return {}

comments = filter(lambda x: isinstance(x, praw.models.Comment) and x.author is not None, queue)
df = pd.DataFrame.from_dict([comment_to_dict(c) for c in comments]).dropna()
df_filtered = df[df['age'] >= timedelta(days=30)]


In [4]:
df_filtered.groupby('author').sum().sort_values(['non-quality', 'score'], ascending=[False, True])[['non-quality', 'quality', 'score']]

Unnamed: 0_level_0,non-quality,quality,score
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [None]:
df.groupby('author').sum().sort_values(['quality', 'non-quality'], ascending=[False, True])[['quality', 'non-quality']]

In [None]:
current_reports = df[(df['age'] <= timedelta(days=3.0))].sort_values(['non-quality', 'score'], ascending=[False, True])
with_hyperlink = current_reports.assign(axis=1, permalink=current_reports['permalink'].map(lambda link: '<a href="{0}?context=1">link</a>'.format(link)))
HTML(with_hyperlink[['non-quality', 'score', 'author', 'age', 'body', 'permalink']].to_html(escape=False))

In [None]:
# Finding patterns
df[df['non-quality'] >= 2.0 ].sort_values(['author', 'non-quality'], ascending=[True, False])

In [None]:
#df[(df['age'] >= timedelta(days=3.0)) & (df['quality'] == 0)].sort_values(['non-quality', 'age'], ascending=[False, True])
#df[(df['age'] >= timedelta(days=10.0)) & (df['quality'] == 0)].apply(lambda comment: print(comment['object'].mod.approve()), axis=1)

In [None]:
def is_user_authored_text(s):
    return len(s) > 0 and s[0] not in [">"]

def first_non_quote_line(s):
    try:
        return next(x for x in s.splitlines() if is_user_authored_text(x))
    except Exception as e:
        print("Comment is all shit?")
        return s

def first_n_words(s, n):
    return " ".join(s.split(" ")[:n])


import re
def sanitize(s):
    without_links = re.sub(r"\[([^\[]*)\]\(http.*\)", r"\1", s)
    without_opening_brackets = re.sub(r"\[", r"\[", without_links)
    return without_opening_brackets

def make_blurb(comment):
    body_blurb = sanitize(first_n_words(first_non_quote_line(comment['body']), 20))
    return('/u/{0}: ["{1}..."](https://www.reddit.com{2}?context=3&sort=best)'.format(comment['author'], body_blurb, comment['object'].permalink))

filtered = df_filtered[df_filtered['quality'] >= 2]

pearled = filtered.assign(pearl_ratio=filtered['quality']/filtered['score']).sort_values("pearl_ratio", ascending=False)

try:
    blurbed = pearled.assign(blurb=pearled.apply(axis=1, func=make_blurb))
    print('\n\n'.join(["{0}".format(x) for x in blurbed['blurb'].tolist()]))
except:
    None

In [None]:
#blurbed.apply(lambda comment: print(comment['object'].mod.approve()), axis=1)

In [None]:
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]
        
formatted_comments = ["{0}".format(x) for x in blurbed['blurb'].tolist()]
#for l in chunks(formatted_comments, 8):
#    thread.reply("\n\n".join(l))