In [40]:
import re

from itertools import product
from collections import defaultdict
from tqdm import tqdm

from litecoder.db import City

In [36]:
def make_key(text, lower=True):
    """Normalize text string -> index key.
    """
    text = text.strip()
    
    # Remove periods
    text = re.sub('\.', '', text)
    
    # Comma -> space
    text = re.sub(',', ' ', text)
    
    # 2+ whitespace -> space
    text = re.sub('\s{2,}', ' ', text)

    if lower:
        text = text.lower()

    return text

In [37]:
make_key('Madison, WI, USA')

'madison wi usa'

In [52]:
USA = ('United States', 'USA', 'US', 'United States of America', 'America')

def keys_iter(row):
    
    states = (row.state_name, row.state_abbr)
    
    for state in states:
        yield '%s %s' % (row.name, state)
        
    for state, usa in product(states, USA):
        yield '%s %s %s' % (row.name, state, usa)
    
    if row.population and row.population > 500000:
        yield row.name

In [53]:
class Litecoder:
    
    @classmethod
    def from_db(cls):
        lc = cls()
        for row in tqdm(City.query.all()):
            for k in keys_iter(row):
                lc[k].append(row.wof_id)
                
        return lc
    
    def __init__(self):
        self._idx = defaultdict(list)
        
    def __getitem__(self, k):
        return self._idx[make_key(k)]

In [54]:
lc = Litecoder.from_db()

100%|██████████| 54285/54285 [00:04<00:00, 11066.23it/s]


In [67]:
lc['fremont CA']

[85921899]