In [43]:
import pandas as pd

from tqdm import tqdm
from cached_property import cached_property
from sqlalchemy.inspection import inspect

from litecoder.usa import USCityIndex, USStateIndex
from litecoder.models import Locality

In [2]:
city_idx = USCityIndex()
city_idx.build()

2018-07-23 09:33:46,234 | INFO : Indexing name -> populations.


344249it [00:13, 25478.56it/s]

2018-07-23 09:34:08,880 | INFO : Indexing US cities.



54727it [00:07, 7047.46it/s]


In [3]:
state_idx = USStateIndex()
state_idx.build()

2018-07-23 09:34:21,562 | INFO : Indexing US states.


51it [00:00, 5768.55it/s]


In [4]:
city_idx

USCityIndex<637590 keys, 54727 entities>

In [5]:
state_idx

USStateIndex<561 keys, 51 entities>

In [6]:
%time city_idx['Baraboo, WI']

CPU times: user 29 µs, sys: 1e+03 ns, total: 30 µs
Wall time: 34.1 µs


[{'area_m2': 19335596.260778,
  'country_iso': 'US',
  'dbpedia_id': None,
  'elevation': 271,
  'factual_id': '08bf0c70-8f76-11e1-848f-cfd5bf3ef515',
  'fips_code': 5504625,
  'freebase_id': None,
  'geonames_id': 5244638,
  'geoplanet_id': 2359079,
  'latitude': 43.469626,
  'library_of_congress_id': None,
  'longitude': -89.737824,
  'name': 'Baraboo',
  'name_a0': 'United States',
  'name_a1': 'Wisconsin',
  'new_york_times_id': None,
  'population': 12048,
  'quattroshapes_id': 813534,
  'wikidata_id': 'Q1023544',
  'wikipedia_page': 'Baraboo, Wisconsin',
  'wikipedia_wordcount': 2028,
  'wof_id': 101733429,
  'wof_region_id': 85688517}]

In [7]:
%time state_idx['north dakota']

CPU times: user 18 µs, sys: 1e+03 ns, total: 19 µs
Wall time: 21 µs


[{'area_m2': 182609378555.8498,
  'country_iso': 'US',
  'fips_code': 'US38',
  'geonames_id': 5690763,
  'geoplanet_id': 2347593,
  'iso_id': 'US-ND',
  'latitude': 47.446315,
  'longitude': -100.469334,
  'name': 'North Dakota',
  'name_a0': 'United States',
  'name_abbr': 'ND',
  'population': 672591,
  'wikidata_id': 'Q1207',
  'wof_country_id': 85633793,
  'wof_id': 85688525}]

In [35]:
class IndexedCity:
    
    def __init__(self, row):
        self.pk = row.wof_id
        self.metadata = dict(row)
        
    def __getattr__(self, key):
        return self.metadata[key]
    
    def as_dict(self):
        return self.metadata
        
    def __repr__(self):
        return '%s<%d, %s, %s, %s>' % (
            self.__class__.__name__,
            self.wof_id,
            self.name,
            self.name_a1,
            self.name_a0,
        )
        
    @cached_property
    def db_row(self):
        return Locality.query.get(self.pk)

In [36]:
loc = Locality.query.first()

In [37]:
ic = IndexedCity(loc)

In [38]:
ic

IndexedCity<85897411, Valley Head, Alabama, United States>

In [40]:
ic.db_row.region

Region<85688675, Alabama, United States>

In [41]:
ic.as_dict()

{'area_m2': 8995650.887739,
 'country_iso': 'US',
 'dbpedia_id': None,
 'elevation': 309,
 'factual_id': '08cd9042-8f76-11e1-848f-cfd5bf3ef515',
 'fips_code': 178240,
 'freebase_id': None,
 'geonames_id': 4095233,
 'geoplanet_id': 2510853,
 'latitude': 34.557517,
 'library_of_congress_id': None,
 'longitude': -85.621354,
 'name': 'Valley Head',
 'name_a0': 'United States',
 'name_a1': 'Alabama',
 'new_york_times_id': None,
 'population': 558,
 'quattroshapes_id': 936530,
 'wikidata_id': 'Q66599',
 'wikipedia_page': 'Valley Head, Alabama',
 'wikipedia_wordcount': 615,
 'wof_id': 85897411,
 'wof_region_id': 85688675}

In [55]:
class IndexedRow:
    
    def __init__(self, row):
        
        state = inspect(row)
        
        self.pk = state.identity
        self.model_cls = state.class_
        
        self.metadata = dict(row)
        
    def __getattr__(self, key):
        return self.metadata[key]
    
    def as_dict(self):
        return self.metadata
        
    def __repr__(self):
        return 'Indexed%s<%d, %s, %s, %s>' % (
            self.model_cls.__name__,
            self.wof_id,
            self.name,
            self.name_a1,
            self.name_a0,
        )
        
    @cached_property
    def db_row(self):
        return self.model_cls.query.get(self.pk)

In [46]:
s = inspect(loc)

In [50]:
s.identity

(85897411,)

In [54]:
s.class_.__name__

'Locality'

In [52]:
s

<sqlalchemy.orm.state.InstanceState at 0x12e2ba470>

In [53]:
Locality.query.get((85897411,))

Locality<85897411, Valley Head, Alabama, United States>

In [56]:
ic = IndexedRow(loc)

In [57]:
ic

IndexedLocality<85897411, Valley Head, Alabama, United States>

In [58]:
ic.population

558

In [59]:
ic.db_row

Locality<85897411, Valley Head, Alabama, United States>