# Catch Basin Data Exploration

#### GDAL Docs
[Geometry](https://gdal.org/java/org/gdal/ogr/Geometry.html)

[Spatial Reference](https://gdal.org/java/org/gdal/osr/SpatialReference.html)

[Field](https://gdal.org/java/org/gdal/ogr/FieldDefn.html)

In [22]:
from osgeo import gdal, gdal_array, osr, ogr
import numpy as np
import rasterio
#from affine import Affine
import pyproj
import pandas as pd
from shapely.geometry import Point
from geopandas import GeoDataFrame
import geopandas as gpd

In [2]:
dataset = ogr.Open('../../../catch_basins/DEPCatchbasins/DEPCATCHBASINS.dbf')
dataset

<osgeo.ogr.DataSource; proxy of <Swig Object of type 'OGRDataSourceShadow *' at 0x7f1e34586c00> >

In [3]:
dataset.GetLayerCount()

1

In [4]:
layer = dataset.GetLayer()
layer

<osgeo.ogr.Layer; proxy of <Swig Object of type 'OGRLayerShadow *' at 0x7f1dd45462a0> >

In [5]:
layer.GetExtent()

(913346.4203000069, 1067265.7970000058, 120928.15340000391, 271705.03180000186)

In [6]:
layer.GetSpatialRef().ExportToPrettyWkt()

'PROJCS["NAD83 / New York Long Island (ftUS)",\n    GEOGCS["NAD83",\n        DATUM["North_American_Datum_1983",\n            SPHEROID["GRS 1980",6378137,298.257222101,\n                AUTHORITY["EPSG","7019"]],\n            AUTHORITY["EPSG","6269"]],\n        PRIMEM["Greenwich",0,\n            AUTHORITY["EPSG","8901"]],\n        UNIT["degree",0.0174532925199433,\n            AUTHORITY["EPSG","9122"]],\n        AUTHORITY["EPSG","4269"]],\n    PROJECTION["Lambert_Conformal_Conic_2SP"],\n    PARAMETER["latitude_of_origin",40.1666666666667],\n    PARAMETER["central_meridian",-74],\n    PARAMETER["standard_parallel_1",41.0333333333333],\n    PARAMETER["standard_parallel_2",40.6666666666667],\n    PARAMETER["false_easting",984250],\n    PARAMETER["false_northing",0],\n    UNIT["US survey foot",0.304800609601219,\n        AUTHORITY["EPSG","9003"]],\n    AXIS["Easting",EAST],\n    AXIS["Northing",NORTH],\n    AUTHORITY["EPSG","2263"]]'

In [7]:
#num obs
layer.GetFeatureCount()

153372

In [8]:
layer.GetFIDColumn()

''

In [9]:
print(layer.GetSpatialFilter())

None


In [10]:
layer.GetNextFeature().GetDefnRef().GetGeomFieldIndex

<bound method FeatureDefn.GetGeomFieldIndex of <osgeo.ogr.FeatureDefn; proxy of <Swig Object of type 'OGRFeatureDefnShadow *' at 0x7f1e34574ab0> >>

In [11]:
layer.GetGeomType()

1

In [12]:
layer.GetMetadata_Dict()

{'DBF_DATE_LAST_UPDATE': '2020-11-30'}

In [13]:
for i in range(layer.GetLayerDefn().GetFieldCount()):
    print(layer.GetLayerDefn().GetFieldDefn(i).GetName())

UNITID
LATITUDE
LONGITUDE
POINT_X
POINT_Y


In [14]:
## fields in layer
layerDefinition = layer.GetLayerDefn()
print("Name  -  Type  Width  Precision")
for i in range(layerDefinition.GetFieldCount()):
    fieldName =  layerDefinition.GetFieldDefn(i).GetName()
    fieldTypeCode = layerDefinition.GetFieldDefn(i).GetType()
    fieldType = layerDefinition.GetFieldDefn(i).GetFieldTypeName(fieldTypeCode)
    fieldWidth = layerDefinition.GetFieldDefn(i).GetWidth()
    GetPrecision = layerDefinition.GetFieldDefn(i).GetPrecision()

    print(fieldName + " - " + fieldType+ " " + str(fieldWidth) + " " + str(GetPrecision))

Name  -  Type  Width  Precision
UNITID - String 254 0
LATITUDE - Real 19 8
LONGITUDE - Real 19 8
POINT_X - Real 19 8
POINT_Y - Real 19 8


In [15]:
## spatial data
sr = layer.GetSpatialRef()
print('Name:', sr.GetName())
print('Spatial ref:',sr.ExportToPrettyWkt())
print('Is projected:',sr.IsProjected())
print('Is geographic:', sr.IsGeographic())
print('UTM Zone:', sr.GetUTMZone())
print('TOWGS84 Params:', sr.GetTOWGS84())
print('Semi Minor:',sr.GetSemiMinor())
print('Semi Major:', sr.GetSemiMajor())
print('Linear Units Name:', sr.GetLinearUnitsName())
print('Inv Flattening:', sr.GetInvFlattening())
print('Angular Units Name:', sr.GetAngularUnitsName())
print('Mapinfo Style CoordSys Format:', sr.ExportToMICoordSys())
print('West lon degree:', sr.GetAreaOfUse().west_lon_degree)
print('East lon degree:', sr.GetAreaOfUse().east_lon_degree)
print('North lat degree:', sr.GetAreaOfUse().north_lat_degree)
print('South lat degree:', sr.GetAreaOfUse().south_lat_degree)

Name: NAD83 / New York Long Island (ftUS)
Spatial ref: PROJCS["NAD83 / New York Long Island (ftUS)",
    GEOGCS["NAD83",
        DATUM["North_American_Datum_1983",
            SPHEROID["GRS 1980",6378137,298.257222101,
                AUTHORITY["EPSG","7019"]],
            AUTHORITY["EPSG","6269"]],
        PRIMEM["Greenwich",0,
            AUTHORITY["EPSG","8901"]],
        UNIT["degree",0.0174532925199433,
            AUTHORITY["EPSG","9122"]],
        AUTHORITY["EPSG","4269"]],
    PROJECTION["Lambert_Conformal_Conic_2SP"],
    PARAMETER["latitude_of_origin",40.1666666666667],
    PARAMETER["central_meridian",-74],
    PARAMETER["standard_parallel_1",41.0333333333333],
    PARAMETER["standard_parallel_2",40.6666666666667],
    PARAMETER["false_easting",984250],
    PARAMETER["false_northing",0],
    UNIT["US survey foot",0.304800609601219,
        AUTHORITY["EPSG","9003"]],
    AXIS["Easting",EAST],
    AXIS["Northing",NORTH],
    AUTHORITY["EPSG","2263"]]
Is projected: 1
Is geograp

In [16]:
## feature (observation) data
count = 0 
for feature in layer:
    #print(feature.GetField("STATE_NAME"))
    geom = feature.GetGeometryRef()
    print('Centroid:',geom.Centroid())
    print('Boundary:',geom.Boundary())
    # print('Buffer:', geom.Buffer())
    # print('Coord Dim:', geom.CoordinateDimension())
    # print('Coord Dim2:', geom.GetCoordinateDimension())
    # print('Area:',geom.Area())
    # print('Dimension:', geom.GetDimension())
    # print('Bounding Envelope:', geom.GetEnvelope())
    # print('Element count:', geom.GetGeometryCount())
    print('Name:', geom.GetGeometryName())
    print('Linear Geometry:', geom.GetLinearGeometry())
    # print('M:',geom.GetM())
    # print('Point:',geom.GetPoint_2D())
    # print('Point Count:',geom.GetPointCount())
    # print('Points:', geom.GetPoints())
    # print('X,Y,Z:', geom.GetX(), geom.GetY(), geom.GetZ())
    # print('Has curve geometry:', geom.HasCurveGeometry())
    # print('Is measured:', geom.IsMeasured())
    print('Is ring:', geom.IsRing())
    print('Is simple:', geom.IsSimple())
    print('Is valid:', geom.IsValid())
    # print('Length:', geom.Length())
    # print('Point on surface:', geom.PointOnSurface())
    # print('Polygonize:', geom.Polygonize())
    # print('Geom field count:', feature.GetGeomFieldCount())
    print('Feature ID:',feature.GetFID())
    print('Unit ID:', feature.UNITID)
    print('Latitude:', feature.LATITUDE)
    print('Longitude:', feature.LONGITUDE)
    print('Point X:', feature.POINT_X)
    print('Point Y:', feature.POINT_Y)
    print('-'*30)
    count +=1
    if count == 3:
        break
layer.ResetReading()

Centroid: POINT (930230.623099998 128315.947500005)
Boundary: GEOMETRYCOLLECTION EMPTY
Name: POINT
Linear Geometry: POINT (930230.623099998 128315.947500005)
Is ring: False
Is simple: True
Is valid: True
Feature ID: 0
Unit ID: CB550115
Latitude: 40.5187
Longitude: -74.1942
Point X: 930230.6231
Point Y: 128315.94750001
------------------------------
Centroid: POINT (930232.445299998 128271.111900002)
Boundary: GEOMETRYCOLLECTION EMPTY
Name: POINT
Linear Geometry: POINT (930232.445299998 128271.111900002)
Is ring: False
Is simple: True
Is valid: True
Feature ID: 1
Unit ID: CB550005
Latitude: 40.51858
Longitude: -74.1942
Point X: 930232.4453
Point Y: 128271.1119
------------------------------
Centroid: POINT (929765.086099997 128081.621099994)
Boundary: GEOMETRYCOLLECTION EMPTY
Name: POINT
Linear Geometry: POINT (929765.086099997 128081.621099994)
Is ring: False
Is simple: True
Is valid: True
Feature ID: 2
Unit ID: CB550061
Latitude: 40.51806227
Longitude: -74.19596261
Point X: 929765.086

In [20]:
x = [40.51806227]
y = [-74.19596261]
[xy for xy in zip(x,y)]

[(40.51806227, -74.19596261)]

In [27]:
layer = dataset.GetLayer()
cb_ref = layer.GetExtent()
## df is simplified view of data 
unit_id = []
geometry_ll = []
geometry = []
for feature in layer:
    unit_id.append(feature.UNITID)
    geometry_ll.append(Point(feature.LATITUDE,feature.LONGITUDE))
    geometry.append(Point(feature.POINT_X, feature.POINT_Y))

In [28]:
df = pd.DataFrame(zip(unit_id),columns = ['unit_id'])
df.head()

Unnamed: 0,unit_id
0,CB550115
1,CB550005
2,CB550061
3,CB550062
4,CB550064


In [29]:
gdf_ll = GeoDataFrame(df, crs = 'EPSG:4326', geometry = geometry_ll)

In [30]:
gdf_gr = GeoDataFrame(df, crs = 'EPSG:2263', geometry = geometry)

In [31]:
gdf_ll.head()

Unnamed: 0,unit_id,geometry
0,CB550115,POINT (930230.623 128315.948)
1,CB550005,POINT (930232.445 128271.112)
2,CB550061,POINT (929765.086 128081.621)
3,CB550062,POINT (929784.007 128055.367)
4,CB550064,POINT (929707.413 128268.676)


In [32]:
gdf_gr.head()

Unnamed: 0,unit_id,geometry
0,CB550115,POINT (930230.623 128315.948)
1,CB550005,POINT (930232.445 128271.112)
2,CB550061,POINT (929765.086 128081.621)
3,CB550062,POINT (929784.007 128055.367)
4,CB550064,POINT (929707.413 128268.676)


In [41]:
gdf_gr[:50].explore('unit_id', legend = False)