In [1]:
from pathlib import Path
import json
from functools import reduce
import math
import datetime as dt
import pytz 
from itertools import product
from collections import OrderedDict
import time
import re
import sys

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely.ops as so

import helpers as hp

%load_ext autoreload
%autoreload 2



In [None]:
# def car_value(purchase_price, num_years):
#     """
#     Depreciation at 40% for first year and 20%/year thereafter.
#     Taken from the Dog and Lemon guide at https://dogandlemon.com/articles/depreciation-new-zealand.
#     """
#     x = purchase_price
#     n = num_years
#     k = math.floor(n)
#     kk = n - k
#     print(k, kk)
#     if n <= 0:
#         v = x
#     else:        
#         v = 0.80**(max(0, n-1))*0.6*x
#     return v

# car_value(73000, 14.2)

# Group regional rents by rental area and number of bedrooms and write to JSON for web

In [3]:
def build_rents_json(region, date):
    """
    Given a region (string) and a date (YYYY-MM-DD string),
    read the rents for the given region, slice them from the given date
    to the latest date in the data, aggregate the rents by rental area and number of bedrooms
    ('1', '2', '3', or '4'), and return the result as a dictionary of the form
    rental area -> #bedrooms -> rent geometric mean.
    Some of the mean rents could be ``None``.
    """
    rents = hp.get_data(region, 'rents_csv')
    f = hp.aggregate_rents(rents, date)
    
    # Drop 5+ bedrooms and round to nearest dollar
    f = f[f['#bedrooms'] != '5+'].copy().round()
    
    # Replace NaN with None to make JSON-compatible 
    f = hp.nan_to_none(f)
    
    # Save to dictionary of form rental area -> #bedrooms -> rent geo mean
    d = {area: dict(g[['#bedrooms', 'rent_geo_mean']].values)
      for area, g in f.groupby('rental_area')}

    return d


date = hp.get_latest_quarters(2)[0]
print('Slicing from date', date)

for region in hp.REGIONS:
    d = build_rents_json(region, date)
    path = hp.get_path(region, 'rents_json')   
    with path.open('w') as tgt:
        json.dump(d, tgt)

    

Slicing from date 2017-03-01


# Compile roundtrip commute costs and durations and save to JSON for web

In [2]:
def build_commute_costs_json(region):
    """
    Consolidate the data in the commute CSV files for this region into 
    one JSON-compatibel dictionary of roundtrip commute costs and durations. 
    More specifically, return a dictionary of the form 
    ``{'index_by_name': index_by_name, 'matrix': M}``, where 
    ``index_by_name`` is a dictionary of the form
    rental area name -> row/column index in the lower-triangular half-matrix 
    ``M``, where ``M`` is encoded by a dictionary of the form
    mode -> list of lists of cost-duration pairs 
    such that ``M[mode][i][j]`` equals the cost in dollars
    and the duration in hours that it takes to travel roundtrip by the 
    given mode from the rental area point with index ``i >= 0`` 
    to the rental area point with index ``j <= i``.
    """
    # Get rental area names
    rents = hp.get_data(region, 'rents_csv')
    names = sorted(rents['rental_area'].unique())
    index_by_name = {name: i for (i, name) in enumerate(names)}
    n = len(names)

    # Add cost info to commutes data
    frames = []
    for mode in hp.MODES:
        f = hp.get_data(region, 'commutes_' + mode)
        f['mode'] = mode
        f['orig_index'] = f['orig_name'].map(index_by_name)
        f['dest_index'] = f['dest_name'].map(index_by_name)
        
        # Convert from meters to kilometers and seconds to hours
        f['distance'] /= 1000
        f['duration'] /= 3600

        # Compute costs
        if mode == 'transit':
            # Use separate cost table
            costs = hp.get_data(region, 'transit_costs').rename(
              columns={'card_fare': 'cost'})
            f = f.merge(costs)
            # Nullify costs with missing distances
            cond = f['distance'].isnull()
            f.loc[cond, 'cost'] = np.nan
        else:
            # Multiply distance by cost per distance
            f['cost'] = hp.COST_BY_MODE[mode]*f['distance']

        # Insert zero costs for area-to-self commutes
        g = pd.DataFrame()
        g['orig_index'] = f['orig_index'].unique()
        g['dest_index'] = f['orig_index'].unique()
        g['mode'] = mode
        g['cost'] = 0
        g['duration'] = 0
        f = pd.concat([f, g])
        frames.append(f[['mode', 'orig_index', 'dest_index', 'cost', 'duration']].copy())

    f = pd.concat(frames)
    
    # Make a dictionary M of the form mode -> [[(cost, duration)]], 
    # where the double list is indexed by rental areas
    M = {mode: [[(None, None) for j in range(n)] for i in range(n)] 
      for mode in hp.MODES}
    f = hp.nan_to_none(f)
    for mode, oi, di, cost, duration in f.itertuples(index=False):
        M[mode][oi][di] = (cost, duration)
    
    # Make a dictionary MM of the form mode -> [[(roundtrip cost, roundtrip duration)]]
    # where the double list is indexed by rental areas i, j with i < j
    MM = {mode: [[(None, None) for j in range(i + 1)] for i in range(n)] 
      for mode in hp.MODES}
    for mode in hp.MODES:
        for i in range(n):
            for j in range(i + 1):
                try:
                    cost = round(M[mode][i][j][0] + M[mode][j][i][0], 2)
                    duration = round(M[mode][i][j][1] + M[mode][j][i][1], 2)
                    MM[mode][i][j] = (cost, duration)
                except TypeError:
                    # Defaults to MM[mode][i][j] = (None, None) 
                    pass  
    
    data = {'index_by_name': index_by_name, 'matrix': MM}
    return data

for region in hp.REGIONS:
    d = build_commute_costs_json(region)
    path = hp.get_path(region, 'commute_costs')   
    with path.open('w') as tgt:
        json.dump(d, tgt)
