In [None]:
import numpy as np
import pandas as pd
import fiona
from shapely.geometry import shape, mapping
from GISio import shp2df, df2shp
import shapefile as sf

### read in a shapefile using `fiona`
more info here: https://github.com/Toblerity/Fiona  
`fiona` manual: http://toblerity.org/shapely/manual.html

In [None]:
with fiona.open('data/YaharaLakes.shp') as src:
    meta = src.meta
    records = []
    for line in src:
        props = line['properties']
        props['geometry'] = line.get('geometry', None)
        records.append(props)

In [None]:
src.meta

#### Each record is a dictionary similar to the GeoJSON format

In [None]:
records[0]

### Make a `DataFrame` of the records
* information on the geometric features is stored in a `'geometry'` column but is still in GeoJSON-style format
* convert geometries to `shapely` features so we can do stuff with them

In [None]:
df = pd.DataFrame(records)
df['geometry'] = [shape(g) for g in df.geometry]
df.head()

### inspect an individual feature

In [None]:
lake_mendota = df.geometry[3]
lake_mendota

In [None]:
lake_mendota.bounds

### since the geometries are `shapely` objects, there are many things we can do with them

In [None]:
from shapely.affinity import rotate
rotate(lake_mendota, 60.)

### Read in the same shapefile with a filter
* this can speed up reading from large datasets (such as NHDPlus) if only a subset of the features are needed

In [None]:
with fiona.open('data/YaharaLakes.shp') as src:
    meta = src.meta
    records = []
    for line in src.filter(bbox=(-89.48, 43.1, -89.37, 43.15)):
        props = line['properties']
        props['geometry'] = line.get('geometry', None)
        records.append(props)
    df2 = pd.DataFrame(records)
df2.head()

### Use `GIS_utils`
* `shp2df` is a macro around `fiona` and `shapely` that also handles null geometries, dbf files, boolean values, etc.

In [None]:
df = shp2df('data/YaharaLakes.shp')
df

### Writing a `DataFrame` to a shapefile

#### make a schema
* since what we are writing out has the same structure as what we read in, we can recycle the schema. Otherwise, we'd have to make one.

In [None]:
src.meta['schema']

#### define the coordinate system (if you want a .prj file)
* we can also recycle this
* but this can also be easily made if the [**epsg code**](http://www.epsg.org/) is known

In [None]:
src.meta['crs']

In [None]:
from fiona.crs import from_epsg
from_epsg(4269)

#### convert the `DataFrame` back to a dictionary

In [None]:
props = df.drop('geometry', axis=1).astype(object).to_dict(orient='records')
props[0]

#### `shapely.geometry.mapping` converts the `shapely` objects back to GeoJSON

In [None]:
mapped = [mapping(g) for g in df.geometry]
mapped[0]

In [None]:
with fiona.collection('data/YaharaLakes2.shp', "w", driver="ESRI Shapefile", 
                      crs=src.meta['crs'], schema=src.meta['schema']) as output:
    for i in range(len(props)):
        output.write({'properties': props[i],
                      'geometry': mapped[i]})

### Using `GIS_utils`
* the `df2shp` macro uses `fiona` to write a `DataFrame` to a shapefile, taking care of the schem, coordinate system, and formatting

In [None]:
df2shp(df, 'data/YaharaLakes2.shp', epsg=4269)

### `Pyshp` is an alternative to `fiona` that is written in pure python (`fiona` uses the OGR library)
* does not handle coordinate system defintions (no prj files)  
* also does not read dbf files for some reason

In [None]:
src = sf.Reader('data/YaharaLakes.shp')
src.fields

In [None]:
geoms = [shape(s) for s in src.iterShapes()]
records = [tuple(r) + (geoms[i],) for i, r in enumerate(src.iterRecords())]
records[0]

In [None]:
src.fields

In [None]:
columns = [f[0] for f in src.fields[1:]] + ['geometry'] # discard the DeletionFlag field (not in records)
df = pd.DataFrame(records, columns=columns)
df.head()

### Writing with `pyshp`

#### specify the shape type
see https://en.wikipedia.org/wiki/Shapefile (scroll down to Shape Types table)
* get the shape type from a feature in the `geometry` column (we're assuming that they are all the same type, as required by the shapefile format)

In [None]:
types = {'Polygon': 5,
         'LineString': 3,
         'Point': 1}
geomtype = types[df.geometry[0].type]
geomtype

#### instantiate the shapefile writer

In [None]:
w = sf.Writer(geomtype)
w.autoBalance = 1 # prevents mismatch between number of features and records

#### rename the fields if necessary to enforce the 10-character limit

In [None]:
def enforce_10ch_limit(names):
    """Enforce 10 character limit for fieldnames.
    Add suffix for duplicate names starting at 0.

    Parameters
    ----------
    names : list of strings

    Returns
    -------
    names : list of unique strings of len <= 10.
    """
    names = [n[:9]+'1' if len(n) > 10 else n
             for n in names]
    dups = {x:names.count(x) for x in names}
    suffix = {n: list(range(len(cnt))) for n, cnt in dups.items() if cnt > 1}
    for i, n in enumerate(names):
        if dups[n] > 1:
            names[i] = n[:9] + str(suffix[n].pop(0))
    return names

# version of the dataframe without the geometry column
dfr = df.drop('geometry', axis=1)

names = enforce_10ch_limit(dfr.columns)
names

#### Specify dtypes and write the fields

In [None]:
def get_pyshp_field_info(dtypename):
    """Get pyshp dtype information for a given numpy dtype."""
    fields = {'int': ('N', 20, 0),
              '<i': ('N', 20, 0),
              'float': ('F', 20, 12),
              '<f': ('F', 20, 0),
              'bool': ('L', 1),
              'b1': ('L', 1),
              'str': ('C', 50),
              'object': ('C', 50)}
    k = [k for k in fields.keys() if k in dtypename.lower()]
    if len(k) == 1:
        return fields[k[0]]
    else:
        return fields['str']

In [None]:
for i, npdtype in enumerate(dfr.dtypes):
    w.field(names[i], *get_pyshp_field_info(npdtype.name))

#### write out the features and records, handling the different shapetypes

In [None]:
g = df.geometry[0]
mapping(g)['coordinates']

In [None]:
geoms = df.geometry.tolist()
props = dfr.astype(object).values

In [None]:
# write the geometry and attributes for each record
if geomtype == 5:
    for i, r in enumerate(props):
        w.poly(mapping(geoms[i])['coordinates'])
        w.record(*r)
elif geomtype == 3:
    for i, r in enumerate(props):
        w.line(mapping(geoms[i])['coordinates'])
        w.record(*r)
elif geomtype == 1:
    for i, r in enumerate(props):
        w.point((mapping(geoms[i])['coordinates']))
        w.record(*r)
w.save('data/YaharaLakes2.shp')