In [1]:
import praw
import pandas as pd
import matplotlib.pyplot as plt
from lxml.html import fragment_fromstring
from html import unescape
import re
import json

p = re.compile("(\S+)")

def _wordcount(s):
    return(len(p.findall(s)))

def wc(html):
    tree = fragment_fromstring(unescape(html))
    words = tree.xpath('.//text()')
    return sum(map(_wordcount, words))

def plot(df, line, file, **kwargs):
    plt.figure()
    axes = df.plot.bar(**kwargs)
    for ax in axes.flatten():
        if line:
            ax.axhline(0, color="k")
        if ax.get_yscale() == "log":
            ax.set_yscale("log", basey=10, subsy=[2, 3, 4, 5, 6, 7, 8, 9])
    plt.tight_layout()
    plt.savefig(file)
    plt.close()

r = praw.Reddit("scanner", user_agent='scraper/0.1')


raw_roster = ['/u/a_sneaky_meerkat', '/u/awesomewow', '/u/BillBrasky', '/u/bttfforever', '/u/bubbasaurus', '/u/CapZapBrannigan', '/u/CauldronThief', '/u/coyotefacts', '/u/darthrobyn', '/u/dawnphoenix', '/u/Diggenwalde', '/u/Discoferry', '/u/emmach17', '/u/emsmale', '/u/erabel', '/u/FancyZombie5', '/u/findthesky', '/u/Flabbergasted_rhino', '/u/funkimon', '/u/HorrorpopZombie', '/u/Icetoa180', '/u/imaginarystudy', '/u/jilliefish', '/u/kabubum', '/u/kemistreekat', '/u/kingjaime_', '/u/littlebs8', '/u/LoneWolfOfTheCalla', '/u/manelski4', '/u/Mrrrrh', '/u/O0mimsy0o', '/u/PenguinJassy', '/u/Penultima', '/u/pezes', '/u/pizzabangle', '/u/Ravenclawmuggle', '/u/RavenoftheSands', '/u/rightypants', '/u/RissaJo685', '/u/Rysler', '/u/sharon-carter', '/u/spacedoutman', '/u/tana-ryu', '/u/TeacherTish', '/u/tinyporcelainehorses', '/u/-MrJ-', '/u/Apex--Redditer', '/u/annul', '/u/Chefjones', '/u/drippingalchemy', '/u/elbowsss', '/u/HermioneReynaChase', '/u/hpello', '/u/infinityxero', '/u/jarris123', '/u/jfinner1', '/u/Larixon', '/u/Lucygirl9-17', '/u/MacabreGoblin', '/u/megabanette', '/u/Miicle', '/u/NiteMary', '/u/oddfictionrambles', '/u/oomps62', '/u/planetjune', '/u/PM_ME_YOUR_BREAKFAST', '/u/qngff', '/u/ravenclawroxy', '/u/RD917', '/u/Ryan814', '/u/seanmik620', '/u/spludgiexx', '/u/suitelifeofem', '/u/TalkNerdyToMe20', '/u/theDUQofFRAT', '/u/Throwawayjust_incase', '/u/UlyNeves', '/u/victorcaet', '/u/WalrusPeon', '/u/Waygookin_saram']
roster = [i[3:].lower() for i in raw_roster]

In [2]:
sub = r.subreddit("hogwartswerewolves")
subms =  sub.submissions()

data = []

for _ in range(13):
    submission = next(subms)
    submission.comments.replace_more()
    data.append([(i.author.name.lower(), wc(i.body_html)) for i in submission.comments.list() if i.author in roster])
    print(submission.title)

data.reverse()

Game VI Phase 12: The Hunter, the Headless, and the Shell-shocked
Game VI Phase 11: Goodbye, Mothman.
Game VI Phase 10: Mob Mentality
Game VI Phase 09: Chaos!
Game VI Phase 08: Death By Chain
Game VI Phase 07: Absolute Chaos
Game VI Phase 06: Death of an Innocent
Game VI Phase 05: A Short Reign
Game VI Phase 04: A New Captain
Game VI Phase 03: The Dark Side Cometh
Game VI Phase 02: Crew Calamity
Game VI Phase 01: Death before Dawn
Game VI Phase 00 (Prologue): Preparing for Battle


In [3]:
cdf = pd.DataFrame(0, index=range(len(data)), columns=roster)
wcdf = pd.DataFrame(0, index=range(len(data)), columns=roster)

for m in range(len(data)):
    for i in data[m]:
        cdf[i[0]][m] += 1
        wcdf[i[0]][m] += i[1]

tcdf = cdf.sum(axis=1)
acdf = cdf.astype(bool).sum(axis=1)
twcdf = wcdf.sum(axis=1)
awcdf = wcdf.astype(bool).sum(axis=1)

In [4]:
bigimg = {
    "subplots": True,
    "layout": (8, 10), # Change this depending on game
    "figsize": (30, 20),
    "legend": False
}

In [5]:
plot(cdf, False, "output1.png", **bigimg, ylim=(0, 150))

In [6]:
plot(wcdf, False, "output3.png", **bigimg, ylim=(0, 5000))

In [7]:
plot(cdf.div(tcdf, axis=0), False, "output2.png", **bigimg, ylim=(0, 0.25))

In [8]:
plot(wcdf.div(twcdf, axis=0), False, "output4.png", **bigimg, ylim=(0, 0.5))

In [9]:
plt.close("all")
plt.get_fignums()

[]

In [10]:
# This part is for the vega-lite graph

cdf_vega = [{"comments": int(cdf[i][k]), "phase": k, "player": i} for i in roster for k in range(len(data))]
with open("vega.json", "w") as f:
    json.dump(cdf_vega, f)
    
with open("data.json", "w") as f:
    json.dump(data, f)