In [1]:
import re
import json
from pathlib import Path
from urllib.parse import urlparse

import jinja2
import pandas as pd

import utils



In [2]:
assert Path('hn.csv').exists(), "You're missing the source file, check the readme"
main_df = pd.read_csv(
    'hn.csv',
    parse_dates=['Created At'],
    dtype={
        'Post Type': 'category'
    })

In [3]:
PATTERN = re.compile('(www\.)?')
def extract_domain(url):
    if pd.isnull(url) or not url.startswith('http'):
        return None
    return PATTERN.sub('', urlparse(url).netloc)

main_df['URL Domain'] = main_df['URL'].apply(extract_domain)
del extract_domain
del PATTERN

##### LStrip "Ask HN:" and "Show HN:" for better readability

In [4]:
main_df.loc[main_df['Post Type'] == 'ask_hn', 'Title'] = main_df.loc[main_df['Post Type'] == 'ask_hn', 'Title'].str.replace(re.compile('(Ask HN: )?'), '')

In [5]:
main_df.loc[main_df['Post Type'] == 'show_hn', 'Title'] = main_df.loc[main_df['Post Type'] == 'show_hn', 'Title'].str.replace(re.compile('(Show HN: )?'), '')

#### Domain Groups (for custom top charts)

In [6]:
with open('domain_groups.json') as fp:
    DOMAIN_GROUPS = json.load(fp)

### Jinja Conf

In [7]:
TEMPLATE_NAME = "template.html"

In [45]:
templateLoader = jinja2.FileSystemLoader(searchpath="./")
templateEnv = jinja2.Environment(loader=templateLoader)
TEMPLATE = templateEnv.get_template(TEMPLATE_NAME)

### Static generation

In [32]:
def generate_page(context_generator, output, *params, force=False):
    context = context_generator(main_df, DOMAIN_GROUPS, *params)
    path = Path(output)
    if path.exists() and not force:
        resp = input(f"> Warning, {output} already exists. Overwrite? [y/N]")
        if resp.lower() != "y":
            print("Aborting...")
            return
    with path.open("w") as fp:
        fp.write(TEMPLATE.render(**context))

### Main generation

In [None]:
assert False, "WARNING! You're about to overrite all the site"

In [46]:
years_to_build = [
    (year, f"{year}.html") for year in utils.ALL_YEARS
]

In [67]:
months = [m['numeric'] for m in utils.ALL_MONTHS]

In [68]:
months_to_build = [(year, int(month), f"{year}-{month}.html") for year in utils.ALL_YEARS for month in months]

In [49]:
weeks_to_build = [
    (year, week, f"{year}-W{week}.html")
    for year in utils.ALL_YEARS
    for week in range(1, utils._get_weeks_in_a_year(year)+1)
]

In [50]:
BASE_BUILD_PATH = Path('docs/')

In [51]:
if not BASE_BUILD_PATH.exists():
    BASE_BUILD_PATH.mkdir()

In [52]:
for year, fname in years_to_build:
    path = BASE_BUILD_PATH / fname
    generate_page(utils.generate_page_context_year, path, year, force=True)

In [64]:
months_to_build[:5]

[(2006, '01', 1, '2006-1.html'),
 (2006, '02', 2, '2006-2.html'),
 (2006, '03', 3, '2006-3.html'),
 (2006, '04', 4, '2006-4.html'),
 (2006, '05', 5, '2006-5.html')]

In [69]:
for year, month, fname in months_to_build:
    path = BASE_BUILD_PATH / fname
    generate_page(utils.generate_page_context_month, path, year, month, force=True)

In [54]:
for year, week, fname in weeks_to_build:
    path = BASE_BUILD_PATH / fname
    generate_page(utils.generate_page_context_week, path, year, week, force=True)

### Special case: `index.html`

In [55]:
year, week, _ = weeks_to_build[-1]

In [56]:
TEMPLATE = templateEnv.get_template(TEMPLATE_NAME)

In [57]:
generate_page(utils.generate_page_context_week, BASE_BUILD_PATH / "index.html", year, week, force=True)