# Task

1. Grab the JSON file `citylots.json` from here: https://github.com/zemirco/sf-city-lots-json
2. Provide the westernmost and easternmost coordinate of ALEMANY based on this JSON file.

In [1]:
#import sys

import numpy as np
#np.set_printoptions(threshold=sys.maxsize)

import json

import time

Read the JSON file first. Unfortunately this is a bit slow.

In [2]:
print('READING JSON FILE')
start = time.time()

with open('citylots.json', 'r') as j:
    json_file = json.load(j)

end = time.time()
total = end - start
print(f'JSON FILE read in {total:.3f} seconds\n')

READING JSON FILE
JSON FILE read in 5.676 seconds



# Implementation

Define functions that do the job

In [3]:
def first_code(verbose=True):
    """This is the first quick and dirty implementation"""
    east = 0
    west = 0

    east_coord = None
    west_coord = None

    for f in json_file['features']:
        street = f['properties']['STREET']
        try:
            coords = np.array(f['geometry']['coordinates'])[0][:,:-1]
            lon = coords[:, 0]
            if street == 'ALEMANY':
                e = np.max(coords[:, 0])
                w = np.min(coords[:, 0])

                eastmost = coords[coords[:, 0] == e][0]
                westmost = coords[coords[:, 0] == w][0]

                if east == 0:
                    east = e
                    east_coord = eastmost
                if west == 0:
                    west = w
                    west_coord = westmost

                if east != 0 and e > east:
                    east = e
                    east_coord = eastmost
                if west != 0 and w < west:
                    west = w
                    west_coord = westmost
        except:
            pass

    if verbose:
        print('                           Latitude / Longitude')
        print('eastmost coordinate: ', east_coord[::-1])
        print('westmost coordinate: ', west_coord[::-1])

In [4]:
def second_code(verbose=True):
    """Not much has changed here to be honest"""
    east = 0
    west = 0

    east_coord = None
    west_coord = None

    for f in json_file['features']:
        street = f['properties']['STREET']
        try:
            coords = np.array(f['geometry']['coordinates'])[0][:,:-1]
            if street == 'ALEMANY':            
                e = np.max(coords[:, 0])
                w = np.min(coords[:, 0])

                eastmost = coords[coords[:, 0] == e][0]
                westmost = coords[coords[:, 0] == w][0]

                if east == 0:
                    east = e
                elif east != 0 and e > east:
                    east = e
                    east_coord = eastmost

                if west == 0:
                    west = w
                elif west != 0 and w < west:
                    west = w
                    west_coord = westmost
        except:
            pass

    if verbose:
        print('                           Latitude / Longitude')
        print('eastmost coordinate: ', east_coord[::-1])
        print('westmost coordinate: ', west_coord[::-1])

This third variation looks different than the previous two but still relies on inefficient for loops

In [5]:
def third_code(verbose=True):
    """This one has a 'lighter' loop"""
    all_coords = []

    for f in json_file['features']:
        street = f['properties']['STREET']
        try:
            coords = np.array(f['geometry']['coordinates'])[0][:,:-1]
            if street == 'ALEMANY':
                for c in coords:
                    crds = [c[0], c[1]]
                    all_coords.append(crds)

        except:
            pass

    east = 0
    west = 0

    east_coord = None
    west_coord = None

    for crd in all_coords:
        if east == 0:
            east = crd[0]
        elif east != 0 and crd[0] > east:
            east = crd[0]
            east_coord = crd

        if west == 0:
            west = crd[0]
        elif west != 0 and crd[0] < west:
            west = crd[0]
            west_coord = crd

    if verbose:
        print('                           Latitude / Longitude')
        print('eastmost coordinate: ', np.array(east_coord)[::-1])
        print('westmost coordinate: ', np.array(west_coord)[::-1])

Let's try it again but optimize it. Less looping, more operations in parallel

In [6]:
def fourth_code(verbose=True):
    """The third revision gave me the idea to remove the big loop alltogether"""
    def get_alemany_coords(item):
        try:
            if item['properties']['STREET'] == 'ALEMANY':
                c = np.array(item['geometry']['coordinates'])[0][:,:-1]
                return c
        except:
            pass
    
    # Using map is nice
    alemany = map(get_alemany_coords, json_file['features'])
    alemany_array = list(alemany)
    
    # ... But you still find two loops in form of list comprehension here
    remove_none = [x for x in alemany_array if x is not None]
    b = [x for y in remove_none for x in y]
    b = np.array(b)#.ravel()

    e = np.max(b[:,0])
    w = np.min(b[:,0])

    east_coord = b[b[:, 0] == e][0]
    west_coord = b[b[:, 0] == w][0]

    if verbose:
        print('                           Latitude / Longitude')
        print('eastmost coordinate: ', east_coord[::-1])
        print('westmost coordinate: ', west_coord[::-1])

# Task solution

Now let's run the functions and check their output

In [7]:
for func in [first_code, second_code, third_code, fourth_code]:
    func(verbose=True)

                           Latitude / Longitude
eastmost coordinate:  [  37.73558079 -122.40976688]
westmost coordinate:  [  37.70988577 -122.47027026]
                           Latitude / Longitude
eastmost coordinate:  [  37.73558079 -122.40976688]
westmost coordinate:  [  37.70988577 -122.47027026]
                           Latitude / Longitude
eastmost coordinate:  [  37.73558079 -122.40976688]
westmost coordinate:  [  37.70988577 -122.47027026]
                           Latitude / Longitude
eastmost coordinate:  [  37.73558079 -122.40976688]
westmost coordinate:  [  37.70988577 -122.47027026]


# Benchmarking

Let's define a function to benchmark above implementations

In [8]:
def speedtest(cnt, f, verbose=False):
    print(f'Testing {f.__name__}')
    start = time.time()
    
    for i in range(cnt):
        f(verbose)
        
    end = time.time()
    total = end - start
    print(f'{cnt} iterations completed in {total:.3f} seconds\nIteration average: {(total/cnt):.3f} seconds')

Benchmark all functions by executing each function 100 times while recording the total execution time and taking the average execution time.

In [9]:
for func in [first_code, second_code, third_code, fourth_code]:
    speedtest(100, func, False)

Testing first_code
100 iterations completed in 126.444 seconds
Iteration average: 1.264 seconds
Testing second_code
100 iterations completed in 122.790 seconds
Iteration average: 1.228 seconds
Testing third_code
100 iterations completed in 131.128 seconds
Iteration average: 1.311 seconds
Testing fourth_code
100 iterations completed in 9.856 seconds
Iteration average: 0.099 seconds


# Benchmarking result

Apparently the fourth implementation is the quickest with a total time of under 10 seconds and an average execution time of around 0.1 seconds.
It's around **12x faster** than implememtation one to three.