#### Geopandas example 
Uses the geopandas library to perform a spatial join of climate data and counties to assign FIPS codes to each 1/8th degree record.

Based on example in https://gis.stackexchange.com/questions/175228/geopandas-spatial-join-extremely-slow

In [1]:
import sys, os
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame, read_file
from geopandas.tools import sjoin
from shapely.geometry import Point, mapping,shape

In [2]:
#Input files
dataDir = "..\\..\\Data"
countyFN = dataDir + os.sep + "cb_2016_us_county_5m.shp"
hydroFN = dataDir + os.sep + "HydroData.csv"
df=pd.read_csv(hydroFN,index_col=None)#,nrows=2000)
outFC = dataDir + os.sep + "HydroFIPS.csv"

In [3]:
#Create a dataframe from the hydro data
df=pd.read_csv(hydroFN,index_col=None)#,nrows=20)

In [4]:
#Add geometry to the hydro data based on its Lat/Long values
##First create a column of shapely point objects from the long/lat values 
df['geometry'] = df.apply(lambda z: Point(z.LONGITUDE, z.LATITUDE), axis=1)
df.head()

Unnamed: 0,YEAR,LONGITUDE,LATITUDE,COFIPS,STFIPS,RUNOFF,PRECIP,ET,SME,geometry
0,2000,-124.6875,47.9375,-1,-1,2216.291016,2860.095459,741.627014,13090.798828,POINT (-124.6875 47.9375)
1,2000,-124.6875,48.0625,53009,53,2181.373291,2823.416016,738.717041,13044.400391,POINT (-124.6875 48.0625)
2,2000,-124.6875,48.1875,53009,53,2167.416016,2814.455078,745.596985,12944.5,POINT (-124.6875 48.1875)
3,2000,-124.6875,48.3125,-1,-1,2310.494873,2894.345947,674.234985,12231.600586,POINT (-124.6875 48.3125)
4,2000,-124.6875,48.4375,-1,-1,2341.796143,2951.890137,694.606018,12462.999023,POINT (-124.6875 48.4375)


In [5]:
#Create a geopandas dataframe from the dataframe created above
PointsGeodataframe = gpd.GeoDataFrame(df)
PointsGeodataframe.head()

Unnamed: 0,YEAR,LONGITUDE,LATITUDE,COFIPS,STFIPS,RUNOFF,PRECIP,ET,SME,geometry
0,2000,-124.6875,47.9375,-1,-1,2216.291016,2860.095459,741.627014,13090.798828,POINT (-124.6875 47.9375)
1,2000,-124.6875,48.0625,53009,53,2181.373291,2823.416016,738.717041,13044.400391,POINT (-124.6875 48.0625)
2,2000,-124.6875,48.1875,53009,53,2167.416016,2814.455078,745.596985,12944.5,POINT (-124.6875 48.1875)
3,2000,-124.6875,48.3125,-1,-1,2310.494873,2894.345947,674.234985,12231.600586,POINT (-124.6875 48.3125)
4,2000,-124.6875,48.4375,-1,-1,2341.796143,2951.890137,694.606018,12462.999023,POINT (-124.6875 48.4375)


In [6]:
#Create a geopandas dataframe from the countyies file
PolygonsGeodataframe = gpd.GeoDataFrame.from_file(countyFN)
PolygonsGeodataframe.head()

Unnamed: 0,AFFGEOID,ALAND,AWATER,COUNTYFP,COUNTYNS,GEOID,LSAD,NAME,STATEFP,geometry
0,0500000US04015,34475567011,387344307,15,25445,4015,6,Mohave,4,"POLYGON ((-114.755618 36.087166, -114.753638 3..."
1,0500000US12035,1257365642,221047161,35,308547,12035,6,Flagler,12,"POLYGON ((-81.52365999999999 29.622432, -81.32..."
2,0500000US20129,1889993251,507796,129,485135,20129,6,Morton,20,"POLYGON ((-102.041952 37.024742, -102.04195 37..."
3,0500000US28093,1828989833,9195190,93,695770,28093,6,Marshall,28,"POLYGON ((-89.7243244282036 34.9952117286505, ..."
4,0500000US29510,160458044,10670040,510,767557,29510,25,St. Louis,29,"POLYGON ((-90.318212 38.600017, -90.301828 38...."


In [7]:
#Drop unneeded columns
PolygonsGeodataframe.drop(['AFFGEOID','ALAND','AWATER','COUNTYFP','COUNTYNS','LSAD'],axis=1,inplace=True)
PolygonsGeodataframe.columns

Index([u'GEOID', u'NAME', u'STATEFP', u'geometry'], dtype='object')

In [8]:
#Set the coordinate system of the points equal to the polygons
PointsGeodataframe.crs = PolygonsGeodataframe.crs
PolygonsGeodataframe.crs

{'init': u'epsg:4269'}

In [9]:
#Execute the spatial join
merged=sjoin(PointsGeodataframe, PolygonsGeodataframe, how='left',op='within')
merged.head()

Unnamed: 0,YEAR,LONGITUDE,LATITUDE,COFIPS,STFIPS,RUNOFF,PRECIP,ET,SME,geometry,index_right,GEOID,NAME,STATEFP
0,2000,-124.6875,47.9375,-1,-1,2216.291016,2860.095459,741.627014,13090.798828,POINT (-124.6875 47.9375),,,,
1,2000,-124.6875,48.0625,53009,53,2181.373291,2823.416016,738.717041,13044.400391,POINT (-124.6875 48.0625),2637.0,53009.0,Clallam,53.0
2,2000,-124.6875,48.1875,53009,53,2167.416016,2814.455078,745.596985,12944.5,POINT (-124.6875 48.1875),2637.0,53009.0,Clallam,53.0
3,2000,-124.6875,48.3125,-1,-1,2310.494873,2894.345947,674.234985,12231.600586,POINT (-124.6875 48.3125),,,,
4,2000,-124.6875,48.4375,-1,-1,2341.796143,2951.890137,694.606018,12462.999023,POINT (-124.6875 48.4375),,,,


In [10]:
#Fix columns
merged.drop(['geometry','index_right'],axis=1,inplace=True)

In [11]:
#Drop missing data
merged.dropna(inplace=True)

In [12]:
#Write to a file
merged.to_csv(outFC,index=False,encoding='utf8')#,na_rep=-9999)