In [1]:
import re
import json
from pathlib import Path
from urllib.parse import urlparse
from datetime import date

import jinja2
import pandas as pd

import utils



### Setup

In [2]:
BASE_BUILD_PATH = Path('docs/')

In [3]:
main_df = pd.read_csv(
    '~/code/python/hn-download-data/hn_data/hn.csv',
    parse_dates=['Created At'],
    dtype={
        'Post Type': 'category'
    })

In [4]:
PATTERN = re.compile('(www\.)?')
def extract_domain(url):
    if pd.isnull(url) or not url.startswith('http'):
        return None
    return PATTERN.sub('', urlparse(url).netloc)

main_df['URL Domain'] = main_df['URL'].apply(extract_domain)
del extract_domain
del PATTERN

In [5]:
main_df.loc[
    main_df['Post Type'] == 'ask_hn', 'Title'
] = main_df.loc[main_df['Post Type'] == 'ask_hn', 'Title'].str.replace(re.compile('(Ask HN: )?'), '')

In [6]:
main_df.loc[
    main_df['Post Type'] == 'show_hn', 'Title'
] = main_df.loc[main_df['Post Type'] == 'show_hn', 'Title'].str.replace(re.compile('(Show HN: )?'), '')

#### Domain Groups (for custom top charts)

In [7]:
with open('domain_groups.json') as fp:
    DOMAIN_GROUPS = json.load(fp)

##### Jinja Conf

In [8]:
TEMPLATE_NAME = "template.html"

In [9]:
templateLoader = jinja2.FileSystemLoader(searchpath="./")
templateEnv = jinja2.Environment(loader=templateLoader)
TEMPLATE = templateEnv.get_template(TEMPLATE_NAME)

---

### Calculate pages to build

In [11]:
ALL_YEARS = [int(y) for y in main_df['Created At'].dt.year.unique()]

In [12]:
year_data = [{
    "year": year,
    "url": f'{year}.html',
    **(dict(zip(['start', 'end'], utils.get_date_range_for_year(year))))
} for year in ALL_YEARS]

In [13]:
month_data = [{
    "year": year,
    "month": month,
    "name": date(year, month, 1).strftime('%b'),
    "url": f"{year}-{month}.html",
    "is_available": utils.get_date_range_for_month(year, month)[1] < pd.Timestamp.now(),
    **(dict(zip(['start', 'end'], utils.get_date_range_for_month(year, month))))
} for year in ALL_YEARS for month in range(1, 13)]

In [14]:
week_data = [{
    "year": year,
    "week": week,
    "name": f"W{week}",
    "url": f"{year}-W{week}.html",
    "is_available": utils.get_date_range_for_week(year, week)[1] < pd.Timestamp.now(),
    **(dict(zip(['start', 'end'], utils.get_date_range_for_week(year, week))))
} for year in ALL_YEARS for week in range(1, utils._get_weeks_in_a_year(year) + 1)]

---


## Static Generator

In [15]:
def generate_page_context(df, domain_groups, fname, year, start, end, week=None, month=None):
    assert not all([week, month])
    top_charts = utils.generate_top_charts(df, domain_groups, start, end)

    context = {
        "start": start,
        "end": end,
        "top_charts": top_charts,
        "selected_year": year,
        
        'ALL_YEARS': ALL_YEARS,
        'ALL_MONTHS': [month for month in month_data if month['year'] == year],
        'ALL_WEEKS': [week for week in week_data if week['year'] == year],
    }
    if month:
        context['selected_month'] = month
    if week:
        context['selected_week'] = week
    return context

#### Generate years

In [None]:
for year in year_data:
    context = generate_page_context(
        main_df, DOMAIN_GROUPS, year['url'], year['year'], year['start'], year['end'])
    path = BASE_BUILD_PATH / year['url']
    with path.open('w') as fp:
        fp.write(TEMPLATE.render(**context))

#### Generate Months

In [None]:
for month in month_data:
    if not month['is_available']:
        continue
    context = generate_page_context(
        main_df, DOMAIN_GROUPS,
        month['url'], month['year'], month['start'], month['end'], month=month['month'])
    
    path = BASE_BUILD_PATH / month['url']
    with path.open('w') as fp:
        fp.write(TEMPLATE.render(**context))

#### Generate Weeks

In [None]:
for week in week_data:
    if not week['is_available']:
        continue
    context = generate_page_context(
        main_df, DOMAIN_GROUPS,
        week['url'], week['year'], week['start'], week['end'], week=week['week'])
    
    path = BASE_BUILD_PATH / week['url']
    with path.open('w') as fp:
        fp.write(TEMPLATE.render(**context))

### Generate index.html

In [None]:
week = [w for w in week_data if w['is_available']][-1]

In [None]:
week_path = BASE_BUILD_PATH / week['url']
assert week_path.exists()

In [None]:
with (BASE_BUILD_PATH / 'index.html').open('w') as fp:
    fp.write(week_path.open().read())