In [34]:
%matplotlib notebook

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import glob
import yaml

In [35]:
data = pd.read_csv("./pages-hits.csv", index_col="Page")
del data['Page Value']
data = data.drop(["/", np.NaN])
data.reset_index(inplace=True)
data['Page'] = data['Page'].apply(lambda x: int(x[1:-1]))
data.set_index('Page', inplace=True)


def get_tags(filename):
    lines = open(filename, "r").readlines()
    info = yaml.load(
        "".join(lines[1:lines.index("---\n", 2)])
    )
    return info['tags']

tag_lookup = {
    filename.split("-")[-1].split('.')[0]: get_tags(filename)
    for filename in glob.glob("../_posts/*.md")
}

tags = {}
for post, ts in tag_lookup.items():
    for tag in ts:
        if tag not in tags:
            tags[tag] = 0

tag_uses = {}
for post, ts in tag_lookup.items():
    for tag in ts:
        if tag not in tag_uses:
            tag_uses[tag] = 0
        tag_uses[tag] += 1

In [36]:
data

Unnamed: 0_level_0,Pageviews,Unique Pageviews,Avg. Time on Page,Entrances,Bounce Rate,% Exit
Page,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
13,32,20,00:01:47,19,68.42%,46.88%
3,15,9,00:00:37,7,57.14%,33.33%
2,9,4,00:00:20,4,50.00%,44.44%
9,8,8,00:00:53,6,66.67%,75.00%
11,5,5,00:00:24,4,50.00%,40.00%
8,5,5,00:01:37,4,75.00%,60.00%
10,4,4,00:00:10,3,66.67%,75.00%
7,4,3,00:08:35,2,0.00%,25.00%
1,3,3,00:01:36,3,33.33%,33.33%
12,3,3,00:00:00,2,100.00%,100.00%


In [37]:
for page, ts in tag_lookup.items():
    page = int(page)
    if page in data.index:
        views = data.loc[page, "Pageviews"]
        for t in ts:
            tags[t] += views

In [38]:
tag_names = [t for t in tags.keys()]
tag_views = [tags[t] for t in tag_names]
tag_occurs = [tag_uses[t] for t in tag_names]

import plotly
from plotly.graph_objs import Scatter, Layout, Bar

plotly.offline.init_notebook_mode(connected=True)

data = Scatter(
    y=tag_views, x=tag_occurs,
    text=tag_names,
    mode= 'markers',
)
fig = plotly.graph_objs.Figure(data=[data], layout=Layout(
    title= 'Tag Use',
    hovermode= 'closest',
    xaxis= dict(
        title= 'Tag Use',
        ticklen= 5,
        zeroline= False,
        gridwidth= 2,
    ),
    yaxis=dict(
        title= 'Tag Views',
        ticklen= 5,
        gridwidth= 2,
    ),
    showlegend= False
))
plotly.offline.iplot(fig)

Small x and large y means that a tag is disproportionately popular; large x and small y means I like a topic but no one else does.

In [39]:
sorted_tags = sorted(tags.items(), key=lambda x: x[1], reverse=True)

plotly.offline.iplot(plotly.graph_objs.Figure(
    data=[
        Bar(
            x=[i[0] for i in sorted_tags],
            y=[i[1] for i in sorted_tags],
        )
    ], layout=Layout(
    title= 'Tag Use',
    hovermode= 'closest',
    xaxis= dict(
        title= 'Tag',
        ticklen= 5,
        zeroline= False,
        gridwidth= 2,
    ),
    yaxis=dict(
        title= 'Tag Frequency',
        ticklen= 5,
        gridwidth= 2,
    ),
    showlegend= False
)))

In [40]:
sorted(tags.keys())

['3d',
 'MRI',
 'audio',
 'autonomy',
 'biomimicry',
 'computation',
 'computer-vision',
 'connectome',
 'connectomics',
 'cv',
 'deep-learning',
 'distributed',
 'electron-microscopy',
 'em',
 'evolution',
 'fMRI',
 'gan',
 'graphs',
 'hardware',
 'image-segmentation',
 'integrator',
 'internet',
 'kinematics',
 'machine-learning',
 'motion-coordination',
 'multiple-sclerosis',
 'networks',
 'neural-net',
 'neural-nets',
 'neurology',
 'neuroscience',
 'octopus',
 'oculomotion',
 'optogenetics',
 'paleontology',
 'quick-read',
 'retina',
 'robot',
 'robotics',
 'simulation',
 'statistics',
 'stats',
 'synapses',
 'vision',
 'visual-system',
 'whale',
 'zebrafish']