In [None]:
from collections import Counter
from datetime import datetime, timedelta
import re

import feedparser
import plotly
import plotly.graph_objs as go

blog_feed = feedparser.parse('http://projects.bobbelderbos.com/pcc/dates/all.rss.xml')
entries = blog_feed['entries']

from pprint import pprint as pp
pp(entries[0])

In [None]:
def get_year_month(date_str):
    date_str = date_str.split('+')[0].strip()
    dt = datetime.strptime(date_str, "%a, %d %b %Y %H:%M:%S")
    return f'{dt.year}-{dt.month}'

yymm = get_year_month('Sun, 07 Jan 2018 12:00:00 +0100')
yymm

In [None]:
def get_category(link):
    known = dict(codechallenge='chanllenge',
    twitter='news',
    special='special', guest='guest')

    default = 'article'
    category = re.sub(r'.*\.es/([a-z]+).*', r'\1', link)
    return known.get(category) or default



In [None]:
pub_dates = [get_year_month(entry.published) for entry in entries]
posts_by_month = Counter(pub_dates)
posts_by_month

In [None]:
categories = [get_category(entry.link) for entry in entries]
cnt = Counter(categories)
categories = cnt.most_common()
categories

In [None]:
tags = [tag.term.lower() for entry in entries for tag in entry.tags]
cnt = Counter(tags)
top_tags = cnt.most_common(20)
top_tags[:5]

In [None]:
def transpose_list_of_tuples(data):
    if isinstance(data, dict):
        data = data.items()
    transposed = list(zip(*data))
    return transposed

transpose_list_of_tuples(posts_by_month)

In [None]:
x, y = transpose_list_of_tuples(posts_by_month)
data = [go.Bar(x=x, y=y)]
plotly.offline.iplot(data, filename='post-frequency')

In [None]:
labels, values = transpose_list_of_tuples(categories)
pie = go.Pie(labels=labels, values=values)
plotly.offline.iplot([pie], filename="categories")

In [None]:
labels, values = transpose_list_of_tuples(top_tags)
tags = go.Pie(labels=labels, values=values)
plotly.offline.iplot([tags], filename='tags')