In [97]:
import pandas
import numpy as np
from bokeh.charts import Area, output_notebook, show
from bokeh.palettes import Inferno11
from bokeh.models import Range1d, HoverTool
from bokeh.models.sources import ColumnDataSource

In [27]:
df = pandas.read_csv(
    open('../data/treatment_detected_linewise.csv', 'r'),
    usecols=['subforum', 'post_id', 'timestamp', 'sentence', 'treatments'],
    index_col=None,
    parse_dates=['timestamp'],
    infer_datetime_format=True
)

In [28]:
df['month'] = df['timestamp'].values.astype('<M8[M]')

In [3]:
len(df)

79231

In [9]:
output_notebook()

In [4]:
df.head()

Unnamed: 0,subforum,post_id,timestamp,sentence,treatments
0,collaboration-space.109,post-185994,2016-06-03 10:59:00,Like info on masking and help with anxiety and...,masking
1,collaboration-space.109,post-185994,2016-06-03 10:59:00,People should probably quickly go on corticost...,steroids
2,introduce-yourself.11,post-249993,2017-05-18 22:38:00,"Accept it and use your enviroment for you, ,th...",masking
3,support.2,post-55450,2014-07-11 21:58:00,Gave me Steroids and antivert.,steroids
4,support.2,post-55538,2014-07-12 14:36:00,This time I am using 'distraction' sounds (I p...,masking


In [5]:
df.groupby("subforum").size()

subforum
alternative-treatments-and-research.27     9157
collaboration-space.109                     101
introduce-yourself.11                     11827
research-news.4                            7087
success-stories.47                         2263
support.2                                 29609
support.52                                  489
support.55                                 2345
treatments.13                             16353
dtype: int64

In [99]:
tr_mon = pandas.crosstab(df["treatments"], df["month"])
tr_mon["sum"] = tr_mon.sum(axis=1)
tr_mon.sort_values("sum", ascending=False, inplace=True)
tr_mon = tr_mon.drop("sum", 1)
head = tr_mon.head(10)
tail = tr_mon.tail(len(tr_mon.index) - 10).sum(axis=0)
tail.name = "Rest"
tr_mon = head.append(tail)
tr_mon = tr_mon.T
ColumnDataSource(tr_mon)

In [109]:
area = Area(
    tr_mon,
    # x=list(tr_mon),
    # y=list(tr_mon["treatments"]),
    # x_axis_type="datetime",
    #y_range=Range1d(0,2500, bounds='auto'),
    x_range=Range1d(np.datetime64('2011', 'Y'), np.datetime64('2018', 'Y'), bounds='auto'),
    tools='xwheel_zoom,xpan,reset,save',
    active_scroll='xwheel_zoom',
    active_drag='xpan',
    palette=Inferno11,
    stack=True,
)

In [110]:
show(area)