# Building grids to merge environmental data. 

In [None]:
import shapely
import geopandas as gdp
import pandas as pd
from geopandas.tools import sjoin
from shapely.geometry import Point
from shapely import speedups
speedups.enable()

In [None]:
# Import data (update to where you stored local files)
grid = gdp.read_file('D:/Documents/SpringBoard/capstone-1/datasets/gridded-shapefile.shp') 
    # read in the gridded shapefile

# Make sure the grid has the right crs
crs = {'init': 'epsg:4326'}
grid = gdp.GeoDataFrame(grid, crs=crs)
grid = grid.loc[:,['OBJECTID', 'geometry']] # only keep the object ID values and the geometry values

In [None]:
# We'll do this first for SST data
sst = pd.read_csv('D:/Documents/SpringBoard/capstone-1/datasets/SST_2012.csv', skiprows=1) 
    # read in shapefile and skip the first row (because it's just an extra header)
    # colnames are: UTC, degrees_north, degrees_east, degree_C
    # where degrees_north = latitude and degrees_east = longitude
    # Some housekeeping... for SST
sst = sst.dropna().reset_index(drop=True) # only keep the sst data that are not NAs
sst = sst.drop_duplicates(subset=['degrees_east', 'degrees_north']) # only keep unique lat/lon

# Turn the SST data into a geometric dataset
geometry = [Point(xy) for xy in zip(sst['degrees_east'], sst['degrees_north'])] 
sst_geom = gdp.GeoDataFrame(sst, crs=crs, geometry=geometry)

sst_polygons = sjoin(sst_geom, grid, how='left') # merge these datasets
sst_polygons = sst_polygons.loc[:,['degrees_north', 'degrees_east', 'OBJECTID']] # only keep these three columns
sst_polygons['OBJECTID'] = sst_polygons['OBJECTID'].astype(str) # make sure the polygon ID is a string
sst_polygons['OBJECTID'] = sst_polygons['OBJECTID'].str.split('.', n=1, expand=True)[0] # and remove the silly .0
sst_polygons.columns = ['degrees_north', 'degrees_east', 'Zone'] # rename the columns
sst_polygons = sst_polygons[sst_polygons.Zone != 'nan'].reset_index(drop=True) # only keep data
    # that have zone IDs
sst_polygons.to_csv('D:/Documents/SpringBoard/capstone-1/datasets/grid_merges/sst_locations.csv') # and save

In [None]:
# Now let's move on to SSS
sss = pd.read_csv('D:/Documents/SpringBoard/capstone-1/datasets/SSS_2012.csv', skiprows=1) 
    # read in shapefile and skip the first row (because it's just an extra header)
    # colnames are: UTC, degrees_north, degrees_east, sss-unit
    # where degrees_north = latitude and degrees_east = longitude
sss = sss.dropna().reset_index(drop=True) # only keep the data that are not NAs
sss = sss.drop_duplicates(subset=['degrees_east', 'degrees_north']) # only keep unique lat/lng

# Turn the data into a geometric dataset
geometry = [Point(xy) for xy in zip(sss['degrees_east'], sss['degrees_north'])]
sss_geom = gdp.GeoDataFrame(sss, crs=crs, geometry=geometry)

sss_polygons = sjoin(sss_geom, grid, how='left') # merge these datasets
sss_polygons = sss_polygons.loc[:,['degrees_north', 'degrees_east', 'OBJECTID']] # only keep these three columns
sss_polygons['OBJECTID'] = sss_polygons['OBJECTID'].astype(str) # make sure the polygon ID is a string
sss_polygons['OBJECTID'] = sss_polygons['OBJECTID'].str.split('.', n=1, expand=True)[0] # and remove the silly .0
sss_polygons.columns = ['degrees_north', 'degrees_east', 'Zone'] # rename the columns
sss_polygons = sss_polygons[sss_polygons.Zone != 'nan'].reset_index(drop=True) # only keep data
    # that have zone IDs
sss_polygons.to_csv('D:/Documents/SpringBoard/capstone-1/datasets/grid_merges/sss_locations.csv') # and save

In [None]:
# Now chl-a
chla = pd.read_csv('D:/Documents/SpringBoard/capstone-1/datasets/2012_chl-a.csv', skiprows=1) 
    # read in shapefile and skip the first row (because it's just an extra header)
    # colnames are: UTC, degrees_north, degrees_east, chla-unit
    # where degrees_north = latitude and degrees_east = longitude
chla = chla.dropna().reset_index(drop=True) # only keep the data that are not NAs
chla = chla.drop_duplicates(subset=['degrees_east', 'degrees_north']) # only keep unique lat/lng

# Turn the data into a geometric dataset
geometry = [Point(xy) for xy in zip(chla['degrees_east'], chla['degrees_north'])] 
chla_geom = gdp.GeoDataFrame(chla, crs=crs, geometry=geometry) 

chla_polygons = sjoin(chla_geom, grid, how='left') # merge these datasets
chla_polygons = chla_polygons.loc[:,['degrees_north', 'degrees_east', 'OBJECTID']] # only keep these three columns
chla_polygons['OBJECTID'] = chla_polygons['OBJECTID'].astype(str) # make sure the polygon ID is a string
chla_polygons['OBJECTID'] = chla_polygons['OBJECTID'].str.split('.', n=1, expand=True)[0] # and remove the silly .0
chla_polygons.columns = ['degrees_north', 'degrees_east', 'Zone'] # rename the columns
chla_polygons = chla_polygons[chla_polygons.Zone != 'nan'].reset_index(drop=True) # only keep data
    # that have zone IDs
chla_polygons.to_csv('D:/Documents/SpringBoard/capstone-1/datasets/grid_merges/chla_locations.csv') # and save

In [None]:
# Seafloor to gradient
depth = pd.read_csv('D:/Documents/SpringBoard/capstone-1/datasets/seafloor_depth_gradient.csv', skiprows=1) 
    # read in shapefile and skip the first row (because it's just an extra header)
    # colnames are: UTC, degrees_north, degrees_east, m
    # where degrees_north = latitude and degrees_east = longitude
depth = depth.dropna().reset_index(drop=True) # only keep the data that are not NAs
depth = depth.drop_duplicates(subset=['degrees_east', 'degrees_north']) # only keep unique lat/lng

depth['degrees_east_neg'] = depth['degrees_east']-360 # we have to convert the longitude because it's 
    # in a different format for this dataset 
    
# Turn the data into a geometric dataset
geometry = [Point(xy) for xy in zip(depth['degrees_east_neg'], depth['degrees_north'])] 
depth_geom = gdp.GeoDataFrame(depth, crs=crs, geometry=geometry)

depth_polygons = sjoin(depth_geom, grid, how='left') # merge these datasets
depth_polygons = depth_polygons.loc[:,['degrees_north', 'degrees_east', 'OBJECTID']] # only keep these three columns
depth_polygons['OBJECTID'] = depth_polygons['OBJECTID'].astype(str) # make sure the polygon ID is a string
depth_polygons['OBJECTID'] = depth_polygons['OBJECTID'].str.split('.', n=1, expand=True)[0] # and remove the silly .0
depth_polygons.columns = ['degrees_north', 'degrees_east', 'Zone'] # rename the columns
depth_polygons = depth_polygons[depth_polygons.Zone != 'nan'].reset_index(drop=True) # only keep data
    # that have zone IDs
depth_polygons.to_csv('D:/Documents/SpringBoard/capstone-1/datasets/grid_merges/depth_locations.csv') # and save

Nice! Now we've got our grids ready to merge with our environmental data. 
Next: Go to the Merging Environmental Data notebook