In [6]:
%matplotlib notebook

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import glob
import yaml

In [10]:
data = pd.read_csv("./pages-hits.csv", index_col="Page")
del data['Page Value']
data = data.drop(["/", np.NaN, "/tag/"])
data.reset_index(inplace=True)
data['Page'] = data['Page'].apply(lambda x: int(x[1:-1]))
data.set_index('Page', inplace=True)


def get_tags(filename):
    lines = open(filename, "r").readlines()
    info = yaml.load(
        "".join(lines[1:lines.index("---\n", 2)])
    )
    return info['tags']

tag_lookup = {
    filename.split("-")[-1].split('.')[0]: get_tags(filename)
    for filename in glob.glob("../_posts/*.md")
}

tags = {}
for post, ts in tag_lookup.items():
    for tag in ts:
        if tag not in tags:
            tags[tag] = 0

tag_uses = {}
for post, ts in tag_lookup.items():
    for tag in ts:
        if tag not in tag_uses:
            tag_uses[tag] = 0
        tag_uses[tag] += 1

In [11]:
data

Unnamed: 0_level_0,Pageviews,Unique Pageviews,Avg. Time on Page,Entrances,Bounce Rate,% Exit
Page,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
13,35,22,00:01:36,21,61.90%,45.71%
19,12,12,00:00:00,12,100.00%,100.00%
23,11,10,00:00:13,9,66.67%,63.64%
30,10,8,00:03:42,7,57.14%,70.00%
9,10,10,00:00:42,7,71.43%,70.00%
31,7,4,00:09:36,3,66.67%,57.14%
11,5,5,00:00:24,4,50.00%,40.00%
8,5,5,00:01:37,4,75.00%,60.00%
10,4,4,00:00:10,3,66.67%,75.00%
12,4,4,00:00:00,3,100.00%,100.00%


In [12]:
for page, ts in tag_lookup.items():
    page = int(page)
    if page in data.index:
        views = data.loc[page, "Pageviews"]
        for t in ts:
            tags[t] += views

In [17]:
tag_names = [t for t in tags.keys()]
tag_views = [tags[t] for t in tag_names]
tag_occurs = [tag_uses[t] for t in tag_names]

import plotly
from plotly.graph_objs import Scatter, Layout, Bar

plotly.offline.init_notebook_mode(connected=True)

data = Scatter(
    y=tag_views, x=tag_occurs,
    text=tag_names,
    mode= 'markers',
)
fig = plotly.graph_objs.Figure(data=[data], layout=Layout(
    title= 'Tag Popularity and Usage',
    hovermode= 'closest',
    xaxis= dict(
        title= 'Tag Use (Number of Posts)',
        ticklen= 5,
        zeroline= False,
        gridwidth= 2,
    ),
    yaxis=dict(
        title= 'Tag Views (Number of Unique Viewers)',
        ticklen= 5,
        gridwidth= 2,
    ),
    showlegend= False
))
plotly.offline.iplot(fig)

Small x and large y means that a tag is disproportionately popular; large x and small y means I like a topic but no one else does.

In [26]:
sorted_tags = sorted(tags.items(), key=lambda x: x[1], reverse=True)

plotly.offline.iplot(plotly.graph_objs.Figure(
    data=[
        Bar(
            x=[i[0] for i in sorted_tags],
            y=[i[1] for i in sorted_tags],
        )
    ], layout=Layout(
    title= 'Tag Use',
    hovermode= 'closest',
    xaxis= dict(
#         title= 'Tag',
        ticklen= 5,
        zeroline= False,
        gridwidth= 2,
        dtick=1
    ),
    yaxis=dict(
        title= 'Tag Frequency',
        ticklen= 5,
        gridwidth= 2,
    ),
    showlegend= False
)))

In [28]:
sorted_tags = sorted(tags.items(), key=lambda x: x[1], reverse=True)

plotly.offline.iplot(plotly.graph_objs.Figure(
    data=[
        Bar(
            x=[i[0] for i in sorted_tags],
            y=[v/o for v, o in zip(tag_views, tag_occurs)],
        )
    ], layout=Layout(
    title= 'Tag Popularity',
    hovermode= 'closest',
    xaxis= dict(
#         title= 'Tag',
        ticklen= 5,
        zeroline= False,
        gridwidth= 2,
        dtick=1
    ),
    yaxis=dict(
        title= 'Use / Views',
        ticklen= 5,
        gridwidth= 2,
    ),
    showlegend= False
)))

In [19]:
sorted(tags.keys())

['3d',
 'BCI',
 'EEG',
 'GCaMP6',
 'MRI',
 'PET',
 'alzheimers',
 'audio',
 'automata',
 'autonomy',
 'banjo',
 'biomimicry',
 'birds',
 'brainwaves',
 'cognition',
 'common-cold',
 'computation',
 'computer-vision',
 'connectome',
 'connectomics',
 'cv',
 'deep-learning',
 'deep-sea',
 'development',
 'distributed',
 'drum',
 'electron-microscopy',
 'electrophysiology',
 'em',
 'epilepsy',
 'evolution',
 'extrastriate-cortex',
 'fMRI',
 'finance',
 'fish',
 'game-theory',
 'gan',
 'geometry',
 'graph-theory',
 'graphs',
 'hardware',
 'hft',
 'image-segmentation',
 'immunology',
 'inflammation',
 'infrastructure',
 'integrator',
 'internet',
 'kinematics',
 'machine-learning',
 'markets',
 'math',
 'microglia',
 'motion-coordination',
 'multiple-sclerosis',
 'music',
 'networks',
 'neural-net',
 'neural-nets',
 'neurology',
 'neuroscience',
 'nlp',
 'oceans',
 'octopus',
 'oculomotion',
 'optogenetics',
 'oscillator',
 'paleontology',
 'password',
 'physics',
 'privacy',
 'quick-read',