In [59]:
import os
import geopandas as gpd
from geopandas import GeoDataFrame
import numpy as np
import pandas as pd

In [60]:
schools = "schools.shp"     # point geometry
highway = "highway.shp"     # line geometry
acs2016 = "acs2016.shp"     # polygon geometry
habitat = "habitat.tif"     # raster data (tiff)
dataset_path = os.path.join(os.path.dirname(os.getcwd()), "datasets")

schools_shp = os.path.join(dataset_path, "schools", schools)
highway_shp = os.path.join(dataset_path, "highway", highway)
acs2016_shp = os.path.join(dataset_path, "acs2016", acs2016)
habitat_tif = os.path.join(dataset_path, "habitat", habitat)

In [61]:
schools_gdf = gpd.read_file(schools_shp)
acs2016_gdf = gpd.read_file(acs2016_shp)

In [12]:
def spatial_join(target_gdf, join_gdf, op="intersects", cols_agg=None,
                 join_type="one to one", keep_all=True):
    """
    Spatial join two GeoDataFrames.

    Parameters
    ----------
    target_gdf, join_gdf : GeoDataFrames
        The GeoDataFrame to join to the target GeoDataFrame.
    op : string, default 'intersects'
        Binary predicate, one of {'intersects', 'contains', 'within'}. See
        http://shapely.readthedocs.io/en/latest/manual.html#binary-predicates.
    cols_agg : dict, default None
        Dict of ``{column_name: list of statistics}``, where the list of
        statistics is a list of strings containing the names of desired
        statistics for each column. Names of the statistics include:
        {'first', 'last', 'sum', 'mean', 'median', 'max', 'min',
        'std', 'var', 'count', 'size'}.
    join_type : string, default 'one to one'
        Binary predicate, one of {'one to one', 'one to many'}. The option
        'one to one' only returns one row for each target feature, whereas
        option 'one to many' return multiple rows for each match between
        target feature and join feature.
    keep_all : bool, default True
        Whether to keep all features from the target GeoDataFrame.
    Returns
    -------
    GeoDataFrame
        A GeoDataFrame contains all columns in the target GeoDataFrame and the
        specified columns from the join GeoDataFrame.
    """
    how = 'left' if keep_all else 'inner'
    gpd_sjoin = gpd.sjoin(target_gdf, join_gdf, how=how, op=op)

    if join_type.lower() == "one to one":
        sjoin_by_index = gpd_sjoin.groupby(gpd_sjoin.index)
        
        if cols_agg is None:
            cols_agg = {col: "first" for col in join_gdf.columns
                        if col != join_gdf.geometry.name}
            join_df = sjoin_by_index.agg(cols_agg)
        else:
            join_df = sjoin_by_index.agg(cols_agg)
            join_df.columns = ['_'.join(col).strip()
                               for col in join_df.columns]
        
        # remove duplicated rows generated by geopandas spatial join
        target_df = gpd_sjoin[target_gdf.columns].drop_duplicates()
        return pd.concat([target_df, join_df], axis=1)
    elif join_type.lower() == "one to many":
        return gpd_sjoin
    else:
        raise ValueError("join_type must be either 'one to one' or "
                         "'one to many'")

In [21]:
schools_gdf.head()

Unnamed: 0,NAME,ADDRESS,CITY,ZIPCODE,OP_CLASS,YR_BUILT,GRADES,ENROLLMENT,TEACHERS,GCID,geometry
0,COUNTRYSIDE CHRISTIAN SCHOOL,10926 NW 39TH AVENUE,GAINESVILLE,32606,PRIVATE,1977,PK-12,106.0,6.5,906,POINT (548925.702 632376.471)
1,TRILOGY SCHOOL OF LEARNING ALTERNATIVE,8700 NW 23RD AVENUE,GAINESVILLE,32606,PRIVATE,1900,01-12,84.0,11.5,907,POINT (551225.228 630789.839)
2,MILLHOPPER MONTESSORI SCHOOL,8505 NW 39TH AVENUE,GAINESVILLE,32606,PRIVATE,1989,PK-08,216.0,22.9,908,POINT (551333.268 632240.464)
3,ST MICHAEL'S EPISCOPAL SCHOOL,4315 NW 23RD AVENUE,GAINESVILLE,32606,PRIVATE,1988,KG-12,0.0,0.0,909,POINT (555621.230 630599.522)
4,BNAI ISRAEL DAY SCHOOL,3830 NW 16TH BLVD,GAINESVILLE,32605,PRIVATE,2004,PK-05,22.0,0.0,910,POINT (556459.830 630478.880)


In [20]:
test_join = spatial_join(acs2016_gdf, schools_gdf, cols_agg={'TEACHERS': 'mean', 'ENROLLMENT': 'sum'}, keep_all=False)
test_join

Unnamed: 0,GEOID10,ACRES,TOTALPOP,HOUSEHOLDS,MALE,FEMALE,AVE_HH_SZ,MED_AGE,HSE_UNITS,VACANT,...,MEDHHINC,MEDOOHVAL,H_SF,H_MF,PCT_POV,MINORITY,DATAYEAR,geometry,T_E_A_C_H_E_R_S,E_N_R_O_L_L_M_E_N_T
0,120010006001,267.844400,1371,412,529,842,3.31,34,482,70,...,38824,90300,482,0,21.517141,1301,ACS 2012-2016,"POLYGON ((564121.721 629847.127, 564127.038 62...",0.000000,0.0
2,120010006003,1493.089927,2291,776,979,1312,2.94,26,915,139,...,24167,79800,455,460,41.057822,2108,ACS 2012-2016,"POLYGON ((565758.254 629636.152, 565695.479 62...",8.000000,345.0
3,120010007001,128.644913,828,228,374,454,3.63,24,228,0,...,26630,93700,205,23,16.425121,828,ACS 2012-2016,"POLYGON ((563316.398 627676.644, 563228.536 62...",0.000000,37.0
4,120010007002,1026.059934,1465,440,659,806,3.33,42,545,105,...,32105,103100,526,19,29.829352,1259,ACS 2012-2016,"POLYGON ((563768.801 627680.962, 563770.869 62...",22.000000,1420.0
5,120010007003,1398.479384,2268,918,1068,1200,2.45,49,1035,117,...,26912,103200,755,230,39.991182,1755,ACS 2012-2016,"POLYGON ((565229.514 624327.588, 564862.624 62...",0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,120010022191,968.612412,2419,986,1148,1271,2.45,30,1142,156,...,33734,63600,172,661,19.057548,1288,ACS 2012-2016,"POLYGON ((555653.778 625028.872, 555996.235 62...",13.000000,113.0
151,120010022192,768.106505,3237,1164,1277,1960,2.68,29,1787,623,...,38750,80300,635,847,29.910141,1934,ACS 2012-2016,"POLYGON ((552717.844 622360.014, 552611.991 62...",28.000000,1889.0
152,120010022193,661.281797,487,106,210,277,4.59,32,106,0,...,107115,0,62,44,0.000000,308,ACS 2012-2016,"POLYGON ((554450.498 626905.499, 554535.079 62...",18.000000,118.0
153,120010022201,5504.472690,2030,892,984,1046,2.27,45,1036,144,...,46429,202400,616,69,21.984048,842,ACS 2012-2016,"POLYGON ((558838.384 620506.997, 559014.388 62...",0.000000,0.0


In [23]:
gpd.sjoin(acs2016_gdf, schools_gdf, how="left")[acs2016_gdf.columns].drop_duplicates()

Unnamed: 0,GEOID10,ACRES,TOTALPOP,HOUSEHOLDS,MALE,FEMALE,AVE_HH_SZ,MED_AGE,HSE_UNITS,VACANT,...,TRAN_OTHER,TRAN_HOME,MEDHHINC,MEDOOHVAL,H_SF,H_MF,PCT_POV,MINORITY,DATAYEAR,geometry
0,120010006001,267.844400,1371,412,529,842,3.31,34,482,70,...,0,28,38824,90300,482,0,21.517141,1301,ACS 2012-2016,"POLYGON ((564121.721 629847.127, 564127.038 62..."
1,120010006002,143.120683,710,284,293,417,2.50,27,439,155,...,0,19,25800,66900,314,125,45.352113,701,ACS 2012-2016,"POLYGON ((563749.518 629175.685, 563774.386 62..."
2,120010006003,1493.089927,2291,776,979,1312,2.94,26,915,139,...,0,73,24167,79800,455,460,41.057822,2108,ACS 2012-2016,"POLYGON ((565758.254 629636.152, 565695.479 62..."
3,120010007001,128.644913,828,228,374,454,3.63,24,228,0,...,0,0,26630,93700,205,23,16.425121,828,ACS 2012-2016,"POLYGON ((563316.398 627676.644, 563228.536 62..."
4,120010007002,1026.059934,1465,440,659,806,3.33,42,545,105,...,0,53,32105,103100,526,19,29.829352,1259,ACS 2012-2016,"POLYGON ((563768.801 627680.962, 563770.869 62..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,120010022191,968.612412,2419,986,1148,1271,2.45,30,1142,156,...,14,16,33734,63600,172,661,19.057548,1288,ACS 2012-2016,"POLYGON ((555653.778 625028.872, 555996.235 62..."
151,120010022192,768.106505,3237,1164,1277,1960,2.68,29,1787,623,...,0,11,38750,80300,635,847,29.910141,1934,ACS 2012-2016,"POLYGON ((552717.844 622360.014, 552611.991 62..."
152,120010022193,661.281797,487,106,210,277,4.59,32,106,0,...,0,0,107115,0,62,44,0.000000,308,ACS 2012-2016,"POLYGON ((554450.498 626905.499, 554535.079 62..."
153,120010022201,5504.472690,2030,892,984,1046,2.27,45,1036,144,...,0,29,46429,202400,616,69,21.984048,842,ACS 2012-2016,"POLYGON ((558838.384 620506.997, 559014.388 62..."


In [21]:
test_sjoin[acs2016_gdf.columns].drop_duplicates()

Unnamed: 0,GEOID10,ACRES,TOTALPOP,HOUSEHOLDS,MALE,FEMALE,AVE_HH_SZ,MED_AGE,HSE_UNITS,VACANT,...,TRAN_OTHER,TRAN_HOME,MEDHHINC,MEDOOHVAL,H_SF,H_MF,PCT_POV,MINORITY,DATAYEAR,geometry
0,120010006001,267.844400,1371,412,529,842,3.31,34,482,70,...,0,28,38824,90300,482,0,21.517141,1301,ACS 2012-2016,"POLYGON ((564121.721 629847.127, 564127.038 62..."
2,120010006003,1493.089927,2291,776,979,1312,2.94,26,915,139,...,0,73,24167,79800,455,460,41.057822,2108,ACS 2012-2016,"POLYGON ((565758.254 629636.152, 565695.479 62..."
3,120010007001,128.644913,828,228,374,454,3.63,24,228,0,...,0,0,26630,93700,205,23,16.425121,828,ACS 2012-2016,"POLYGON ((563316.398 627676.644, 563228.536 62..."
4,120010007002,1026.059934,1465,440,659,806,3.33,42,545,105,...,0,53,32105,103100,526,19,29.829352,1259,ACS 2012-2016,"POLYGON ((563768.801 627680.962, 563770.869 62..."
5,120010007003,1398.479384,2268,918,1068,1200,2.45,49,1035,117,...,0,73,26912,103200,755,230,39.991182,1755,ACS 2012-2016,"POLYGON ((565229.514 624327.588, 564862.624 62..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,120010022191,968.612412,2419,986,1148,1271,2.45,30,1142,156,...,14,16,33734,63600,172,661,19.057548,1288,ACS 2012-2016,"POLYGON ((555653.778 625028.872, 555996.235 62..."
151,120010022192,768.106505,3237,1164,1277,1960,2.68,29,1787,623,...,0,11,38750,80300,635,847,29.910141,1934,ACS 2012-2016,"POLYGON ((552717.844 622360.014, 552611.991 62..."
152,120010022193,661.281797,487,106,210,277,4.59,32,106,0,...,0,0,107115,0,62,44,0.000000,308,ACS 2012-2016,"POLYGON ((554450.498 626905.499, 554535.079 62..."
153,120010022201,5504.472690,2030,892,984,1046,2.27,45,1036,144,...,0,29,46429,202400,616,69,21.984048,842,ACS 2012-2016,"POLYGON ((558838.384 620506.997, 559014.388 62..."


In [None]:
by_index = test_join.groupby(test_join.index)

In [None]:
by_index.agg({"TEACHERS": [np.mean, np.min, np.count]})

In [30]:
test_str = "TEACHERS mean; TEACHERS max; TEACHERS min"

In [45]:
test_list = [tuple(item.strip().split()) for item in test_str.split(";")]

In [48]:
[test_df_dict[k].add(v) for k, v in test_list]

[None, None, None]

In [50]:
dict(test_df_dict)

{'TEACHERS': {'max', 'mean', 'min'}}

In [1]:
from collections import defaultdict

In [None]:
std_dict = dict(numbers=[1, 2, 3], letters=['a', 'b', 'c'])

In [2]:
class _AggStringToDict:
    def __init__(self):
        self.data = defaultdict(set)

    def add(self, column, statistic):
        self.data[column].add(statistic)

In [3]:
test = _AggStringToDict()

In [4]:
test.add("TEACHERS", 'mean')

In [7]:
[test.add(a,b ) for a, b in [('LAT', 'mean'), ('LON', 'sum'), ('LAT', 'max'), ('LON', 'mean')]]

[None, None, None, None]

In [6]:
dict(test.data)

{'TEACHERS': {'mean'}}

In [10]:
dict(test.data)

{'TEACHERS': {'mean'}, 'LAT': {'max', 'mean'}, 'LON': {'mean', 'sum'}}

In [56]:
test.__class__.__name__

'_AggStringToDict'

In [47]:
test_df_dict = defaultdict(set)

In [None]:
join_df = test_join.loc[:,schools_cols]

In [None]:
by_index = join_df.groupby(test_join.index)

In [None]:
join_df_agg = by_index.agg([np.mean, np.median, np.sum, np.std])

In [None]:
result = pd.concat([acs2016_gdf, join_df_agg], axis=1)

In [None]:
result.crs = acs2016_gdf.crs

In [57]:
import fiona

In [58]:
fiona.supported_drivers

{'AeronavFAA': 'r',
 'ARCGEN': 'r',
 'BNA': 'raw',
 'DXF': 'raw',
 'CSV': 'raw',
 'OpenFileGDB': 'r',
 'ESRIJSON': 'r',
 'ESRI Shapefile': 'raw',
 'GeoJSON': 'rw',
 'GeoJSONSeq': 'rw',
 'GPKG': 'rw',
 'GML': 'raw',
 'GPX': 'raw',
 'GPSTrackMaker': 'raw',
 'Idrisi': 'r',
 'MapInfo File': 'raw',
 'DGN': 'raw',
 'PCIDSK': 'r',
 'S57': 'r',
 'SEGY': 'r',
 'SUA': 'r',
 'TopoJSON': 'r'}

In [33]:
cols_agg = {"TEACHERS": ["mean", "sum", "count"], "ENROLLMENT": ("first", "min"), "YR_BLT": {"min"}}

In [34]:
[f"{key}_{v}" for key, value in cols_agg.items() for v in value]

['TEACHERS_mean',
 'TEACHERS_sum',
 'TEACHERS_count',
 'ENROLLMENT_first',
 'ENROLLMENT_min',
 'YR_BLT_min']

In [28]:
for v in ("min"):
    print(v)

m
i
n


In [None]:
{for v in cols_agg.values}

In [None]:
by_index.agg({"TEACHERS": ["skew", "last", "mean", "max"], "ENROLLMENT": [np.sum, np.min]}).tail(20)

In [None]:
[type(x) == tuple for x in result.columns.values]

In [None]:
cols

In [None]:
cols = ['_'.join(col).strip() if type(col)==tuple else col for col in result.columns]

In [None]:
result.columns

In [None]:
result.head()

In [None]:
result.loc[:, ['g e o m e t r y']]

In [None]:
result.to_file(r"C:\Users\chjch\Desktop\test_result.shp")

In [None]:
acs2016_gdf.crs

In [None]:
schools_gdf.columns.values

In [2]:
dim = 2
to_unit = 'acrees'

In [8]:
def _pluralize(name):
    # convert unit name to its corresponding plural form
    if name.endswith("Inch"):
        return name.replace("Inch", "Inches")
    elif name.endswith("Foot"):
        return name.replace("Foot", "Feet")
    else:
        return name + "s"

In [13]:
class UnitHandler:

    UNIT_MAP = {'km': ('Kilometer', 1000.0),
                'm': ('Meter', 1.0),
                'dm': ('Decimeter', 0.1),
                'cm': ('Centimeter', 0.01),
                'mm': ('Millimeter', 0.001),
                'kmi': ('International Nautical Mile', 1852.0),
                'in': ('International Inch', 0.0254),
                'ft': ('International Foot', 0.3048),
                'yd': ('International Yard', 0.9144),
                'mi': ('International Statute Mile', 1609.344),
                'fath': ('International Fathom', 1.8288),
                'ch': ('International Chain', 20.1168),
                'link': ('International Link', 0.201168),
                'us-in': ("U.S. Surveyor's Inch", 0.0254000508001016),
                'us-ft': ("U.S. Surveyor's Foot", 0.3048006096012192),
                'us-yd': ("U.S. Surveyor's Yard", 0.9144018288036576),
                'us-ch': ("U.S. Surveyor's Chain", 20.116840233680467),
                'us-mi': ("U.S. Surveyor's Statute Mile", 1609.3472186944373),
                'ind-yd': ('Indian Yard', 0.91439523),
                'ind-ft': ('Indian Foot', 0.30479841),
                'ind-ch': ('Indian Chain', 20.11669506)}

    # alternative unit names
    OTHER_NAMES = {"Metre": "m",
                   "Mile": "us-mi",
                   "Foot": "us-ft",
                   "Yard": "us-yd",
                   "Inch": "us-in"}

    # normal unit names
    VALID_NAMES = {v[0]: k for k, v in UNIT_MAP.items()}

    def __init__(self, unit):
        unit_validation = UnitHandler.validate(unit)
        if unit_validation is None:
            raise ValueError("The unit provided is invalid or not supported.")
        else:
            self.unit_id = unit_validation

    @property
    def fullname(self):
        return self.UNIT_MAP[self.unit_id][0]

    @property
    def base_factor(self):
        """conversion factor compare to meter."""
        return self.UNIT_MAP[self.unit_id][1]

    @property
    def plural(self):
        return _pluralize(self.fullname)

    @classmethod
    def _unit_name_map(cls):
        return {**cls.VALID_NAMES, **cls.OTHER_NAMES,
                **{_pluralize(k): v for k, v in cls.VALID_NAMES.items()},
                **{_pluralize(k): v for k, v in cls.OTHER_NAMES.items()}}

    @classmethod
    def validate(cls, unit):
        try:
            unit_lower = unit.lower()
            unit_title = unit.title()
            unit_name_map = cls._unit_name_map()
            if unit_lower in cls.UNIT_MAP.keys():
                return unit_lower
            elif unit_title in unit_name_map.keys():
                return unit_name_map[unit_title]
            else:
                return None
        except Exception as err:
            print(err)

    def convert(self, to_unit, dimension=1):
        try:
            to_unit_handler = UnitHandler(to_unit)
            factor_1d = self.base_factor / to_unit_handler.base_factor
            if dimension == 1:
                return factor_1d
            elif dimension == 2:
                return factor_1d**2
        except ValueError:
            acre_alias = ['acre', 'acres']
            hectare_alias = ['hectare', 'hectares']
            if dimension == 2:
                if to_unit.lower() in acre_alias:
                    areal_base = 'us-ft'
                    areal_factor = 43560
                elif to_unit.lower() in hectare_alias:
                    areal_base = 'm'
                    areal_factor = 10000
                else:
                    raise ValueError(f'{to_unit} is not a valid area unit.')
                areal_unit_handler = UnitHandler(areal_base)
                factor_1d = self.base_factor / areal_unit_handler.base_factor
                return factor_1d**2 / areal_factor
            else:
                raise ValueError(f'{to_unit} is not a valid length unit.')

In [None]:
def convert(self, other_unit):
    try:
        other_unit_handler = UnitHandler(other_unit)
        factor_1d = self.base_factor / other_unit_handler.base_factor
        if self.dimension == 1:
            return factor_1d
        elif self.dimension == 2:
            return factor_1d**2
    except ValueError:
        acre_alias = ['acre', 'acres']
        hectare_alias = ['hectare', 'hectares']
        if self.dimension == 2:
            if other_unit.lower() in acre_alias:
                areal_base = 'us-ft'
                areal_factor = 43560
            elif other_unit.lower() in hectare_alias:
                areal_base = 'm'
                areal_factor = 10000
            else:
                raise ValueError(f'{other_unit} is not a valid area unit.')
            areal_unit_handler = UnitHandler(areal_base)
            factor_1d = self.base_factor / areal_unit_handler.base_factor
            return factor_1d**2 / areal_factor
        else:
            raise ValueError(f'{other_unit} is not a valid length unit.')

In [14]:
my_unit = UnitHandler('mile')

In [23]:
my_unit.convert('m', dimension=2)

2589998.470319521

In [18]:
if not None:
    print("okay")

okay


In [43]:
if 'm':
    print("ok")
else:
    print('not okay')

ok


In [31]:
AREA_NAMES = {'Acre': 'us-ft', 'Hectare': 'm'}

In [34]:
list(AREA_NAMES.keys()) + [_pluralize(k) for k in AREA_NAMES.keys()]

['Acre', 'Hectare', 'Acres', 'Hectares']

In [57]:
class UnitHandler:

    UNIT_MAP = {'km': ('Kilometer', 1000.0),
                'm': ('Meter', 1.0),
                'dm': ('Decimeter', 0.1),
                'cm': ('Centimeter', 0.01),
                'mm': ('Millimeter', 0.001),
                'kmi': ('International Nautical Mile', 1852.0),
                'in': ('International Inch', 0.0254),
                'ft': ('International Foot', 0.3048),
                'yd': ('International Yard', 0.9144),
                'mi': ('International Statute Mile', 1609.344),
                'fath': ('International Fathom', 1.8288),
                'ch': ('International Chain', 20.1168),
                'link': ('International Link', 0.201168),
                'us-in': ("U.S. Surveyor's Inch", 0.0254000508001016),
                'us-ft': ("U.S. Surveyor's Foot", 0.3048006096012192),
                'us-yd': ("U.S. Surveyor's Yard", 0.9144018288036576),
                'us-ch': ("U.S. Surveyor's Chain", 20.116840233680467),
                'us-mi': ("U.S. Surveyor's Statute Mile", 1609.3472186944373),
                'ind-yd': ('Indian Yard', 0.91439523),
                'ind-ft': ('Indian Foot', 0.30479841),
                'ind-ch': ('Indian Chain', 20.11669506)}

    # alternative unit names
    OTHER_NAMES = {"Metre": "m",
                   "Mile": "us-mi",
                   "Foot": "us-ft",
                   "Yard": "us-yd",
                   "Inch": "us-in"}

    # normal unit names
    VALID_NAMES = {v[0]: k for k, v in UNIT_MAP.items()}

    # additional area names supported
    AREA_NAMES = {'Acre': 'ac',
                  'Hectare': 'ha'}

    def __init__(self, unit):
        validate_1d = self._validate_1d(unit)
        if validate_1d:
            self.unit_id = validate_1d
            self.dimension = 1
            return
        validate_2d = self._validate_2d(unit)
        if validate_2d:
            self.unit_id = validate_2d
            self.dimension = 2
            return
        else:
            raise ValueError(f'{unit} is not a valid unit.')

    @property
    def fullname(self):
        if self.dimension == 1:
            return self.UNIT_MAP[self.unit_id][0]
        else:  # dimension=2
            if self.unit_id == 'ac':
                return 'Acre'
            elif self.unit_id == 'ha':
                return 'Hectare'
            else:
                return f'Square {self.UNIT_MAP[self.unit_id][0]}'

    @staticmethod
    def _pluralize(name):
        # convert unit name to its corresponding plural form
        if name.endswith("Inch"):
            return name.replace("Inch", "Inches")
        elif name.endswith("Foot"):
            return name.replace("Foot", "Feet")
        else:
            return name + "s"

    @property
    def plural(self):
        return self._pluralize(self.fullname)

    @property
    def base_factor(self):
        """conversion factor compare to meter or square meter."""
        if self.dimension == 1:
            return self.UNIT_MAP[self.unit_id][1]
        else:  # dimension=2
            if self.unit_id == 'ac':
                return self.UNIT_MAP['us-ft'][1]**2 * 43560
            elif self.unit_id == 'ha':
                return 10000
            else:
                return self.UNIT_MAP[self.unit_id][1]**2

    def _unit_name_map(self):
        return {**self.VALID_NAMES, **self.OTHER_NAMES,
                **{self._pluralize(k): v for k, v in self.VALID_NAMES.items()},
                **{self._pluralize(k): v for k, v in self.OTHER_NAMES.items()}}

    def _validate_1d(self, unit):
        try:
            unit_lower = unit.lower()
            unit_title = unit.title()
            unit_name_map = self._unit_name_map()
            if unit_lower in self.UNIT_MAP.keys():
                return unit_lower
            elif unit_title in unit_name_map.keys():
                return unit_name_map[unit_title]
            else:
                return None
        except Exception as err:
            print(err)

    def _validate_2d(self, unit):
        try:
            sq, unit_name = unit.split()
            if sq in ['square', 'sq'] and self._validate_1d(unit_name):
                return self._validate_1d(unit_name)
            else:
                raise ValueError('Not a valid areal unit.')
        except ValueError:
            if unit.title() in self.AREA_NAMES.keys():
                return self.AREA_NAMES[unit.title()]
            elif unit[:-1].title() in self.AREA_NAMES.keys():
                return self.AREA_NAMES[unit[:-1].title()]
            else:
                raise ValueError('Not a valid areal unit.')

    def convert(self, other_unit):
        other_unit_handler = UnitHandler(other_unit)
        if self.dimension != other_unit_handler.dimension:
            raise TypeError(f'Incompatible units, from {self.fullname} to '
                            f'{other_unit_handler.fullname}.')
        else:
            return self.base_factor / other_unit_handler.base_factor

In [89]:
my_unit = UnitHandler('sq mile')

In [91]:
x = 2
x *= 2

In [93]:
search_area = 3.14

In [94]:
search_area *= my_unit.convert('acre')
search_area

2009.6000000000004

In [6]:
if not 0:
    print('ok')

ok


In [8]:
a = 3

In [10]:
b = (f'{a} '
     f'test')

In [17]:
acs2016_gdf.head()

Unnamed: 0,GEOID10,ACRES,TOTALPOP,HOUSEHOLDS,MALE,FEMALE,AVE_HH_SZ,MED_AGE,HSE_UNITS,VACANT,...,TRAN_OTHER,TRAN_HOME,MEDHHINC,MEDOOHVAL,H_SF,H_MF,PCT_POV,MINORITY,DATAYEAR,geometry
0,120010006001,267.8444,1371,412,529,842,3.31,34,482,70,...,0,28,38824,90300,482,0,21.517141,1301,ACS 2012-2016,POINT (564653.623 629502.953)
1,120010006002,143.120683,710,284,293,417,2.5,27,439,155,...,0,19,25800,66900,314,125,45.352113,701,ACS 2012-2016,POINT (563788.273 628725.213)
2,120010006003,1493.089927,2291,776,979,1312,2.94,26,915,139,...,0,73,24167,79800,455,460,41.057822,2108,ACS 2012-2016,POINT (565789.586 628317.670)
3,120010007001,128.644913,828,228,374,454,3.63,24,228,0,...,0,0,26630,93700,205,23,16.425121,828,ACS 2012-2016,POINT (563364.370 627976.814)
4,120010007002,1026.059934,1465,440,659,806,3.33,42,545,105,...,0,53,32105,103100,526,19,29.829352,1259,ACS 2012-2016,POINT (562936.959 626506.905)


In [16]:
acs2016_gdf.geometry = acs2016_gdf.centroid

In [18]:
zstats = [{'count': 75,
  'max': 22.273418426513672,
  'mean': 14.660084635416666,
  'min': 6.575114727020264},
 {'count': 50,
  'max': 82.69043731689453,
  'mean': 56.60576171875,
  'min': 16.940950393676758}]

In [44]:
zstats = [{'sum': 115.0}, {'sum': None}, {'sum': 74.0}, {'sum': 204.0}, {'sum': None}]

In [31]:
np.array([v for d in zstats for v in d.values()])

array([115., 191.,  74., 204.,  97.])

In [23]:
%timeit [v for d in zstats for k, v in d.items()]

784 ns ± 12.3 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [29]:
%timeit np.array([v for d in zstats for v in d.values()])

1.5 µs ± 68.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [45]:
%timeit np.array([v if v is not None else np.nan for d in zstats for v in d.values()])

1.63 µs ± 84.4 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [47]:
np.array([v if v is not None else np.nan for d in zstats for v in d.values()])

array([115.,  nan,  74., 204.,  nan])

In [48]:
np.array([v for d in zstats for v in d.values()], dtype=np.float)

array([115.,  nan,  74., 204.,  nan])

In [42]:
a = 0
if a is not None:
    print('ok')

ok


In [46]:
%timeit np.array([v for d in zstats for v in d.values()], dtype=np.float)

1.82 µs ± 67.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [24]:
%timeit pd.DataFrame(zstats)

254 µs ± 4.47 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [40]:
%timeit (np.array([1, 2, 3, np.nan]) / 3)

1.92 µs ± 115 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [41]:
%timeit (np.array([1, 2, 3, None], dtype=np.float) / 3)

2.05 µs ± 26.9 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [52]:
def foo(num1, num2):
    def _foo():
        return num1 * num2
    return _foo


In [53]:
foo(1,2)()

2

In [56]:
import timeit

In [54]:
def f1():
    for n in range(100):
        pass
    


In [57]:
print(timeit.timeit(f1, number=1000))

0.0017397999999957392


In [65]:
type(acs2016_gdf.copy())

geopandas.geodataframe.GeoDataFrame

In [62]:
acs2016_gdf.area

0      1.083932e+06
1      5.791912e+05
2      6.042345e+06
3      5.206096e+05
4      4.152334e+06
           ...     
150    3.919851e+06
151    3.108429e+06
152    2.676123e+06
153    2.227590e+07
154    6.812558e+06
Length: 155, dtype: float64

In [None]:
def foo(num1, num2):
    def _foo():
        num1 * num2
        pass
    return _foo

A = 1
B = 2

import timeit
t = timeit.Timer(foo(A,B))  
print t.timeit(5)

In [66]:
k = list('abc')
v = [1, 2]

In [70]:
my_dict = {'NAME': 'first', 'ADDRESS': 'first', 'CITY': 'first', 'ZIPCODE': 'first', 
           'OP_CLASS': 'first', 'YR_BUILT': 'first', 'GRADES': 'first', 
           'ENROLLMENT': 'first', 'TEACHERS': 'first', 'GCID': 'first'}


In [72]:
[f"{key}_{v}"
 for key, value in my_dict.items()
 for v in [value]]

['NAME_first',
 'ADDRESS_first',
 'CITY_first',
 'ZIPCODE_first',
 'OP_CLASS_first',
 'YR_BUILT_first',
 'GRADES_first',
 'ENROLLMENT_first',
 'TEACHERS_first',
 'GCID_first']

In [None]:
 for key, value in my_dict.items()：
    if len(value) == 1:
        