In [1]:
# import modules
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point

In [2]:
def percentile(n):
    def percentile_(x):
        return np.percentile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

In [3]:
# read in the river profile CSV
data_dir = '/media/TopographicData/TopographicData/san_andreas/SAF_combined/SAF_only/'
df = pd.read_csv(data_dir+'SAF_only_profiles_fault_dist_SO3.csv')
df = df[df['slope'] > 0]
df.columns

Index(['basin_id', 'id', 'node', 'distance_from_outlet', 'elevation',
       'drainage_area', 'stream_order', 'slope', 'latitude', 'longitude',
       'new_id', 'fault_dist', 'direction'],
      dtype='object')

In [14]:
# read in the hillslope metrics CSV
hs_df = pd.read_csv(data_dir+'SAF_only_hillslopes_SO3.csv')

In [15]:
# read in the hilltop metrics CSV
ht_df = pd.read_csv(data_dir+'SAF_only_RidgeData_SO3.csv')

In [4]:
# convert the river csv to a geodataframe. Remove the non-unique ID labels - these will be replaced by unique basin IDs
geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
crs = {"init": "EPSG:4326"} #http://www.spatialreference.org/ref/epsg/2263/
river_gdf = gpd.GeoDataFrame(df.drop(['latitude','longitude','basin_id','id','new_id','node'], axis=1), crs=crs, geometry=geometry)

In [5]:
river_gdf

Unnamed: 0,distance_from_outlet,elevation,drainage_area,stream_order,slope,fault_dist,direction,geometry
0,472.956000,754.330000,1443,1,0.280238,524.210347,-0.000194,POINT (-120.77122 36.20501)
1,471.541504,754.309998,1445,1,0.284092,524.210347,-0.000194,POINT (-120.77121 36.20502)
2,470.541504,753.969971,1446,1,0.288058,524.210347,-0.000194,POINT (-120.77120 36.20503)
3,469.541504,753.500000,1449,1,0.289001,524.210347,-0.000195,POINT (-120.77119 36.20503)
4,468.541504,752.979980,1450,1,0.289479,524.210347,-0.000195,POINT (-120.77118 36.20503)
...,...,...,...,...,...,...,...,...
5993315,3.414214,19.459999,56283,3,0.034965,1061.679334,0.000049,POINT (-116.03499 33.61016)
5993316,2.414214,19.430000,56300,3,0.034903,1061.679334,0.000049,POINT (-116.03500 33.61016)
5993317,1.000000,19.369999,56363,3,0.035012,1061.679334,0.000049,POINT (-116.03501 33.61016)
5993318,0.000000,19.330000,56364,3,0.034542,1061.679334,0.000049,POINT (-116.03502 33.61017)


In [7]:
# convert the hillslope csv to a geodataframe. Remove the non-unique ID labels
geometry = [Point(xy) for xy in zip(hs_df.longitude, hs_df.latitude)]
hs_gdf = gpd.GeoDataFrame(hs_df.drop(['latitude','longitude','basin_id','new_id'], axis=1), crs=crs, geometry=geometry)

In [8]:
# convert the hilltop csv to a geodataframe. Remove the non-unique ID labels
geometry = [Point(xy) for xy in zip(ht_df.longitude, ht_df.latitude)]
ht_gdf = gpd.GeoDataFrame(ht_df.drop(['latitude','longitude','basin_id','new_id'], axis=1), crs=crs, geometry=geometry)

In [6]:
# add a unique id to the basin
basin_gdf = gpd.read_file(data_dir+'SAF_only_basins_deflection.shp', crs=crs)
basin_gdf = basin_gdf.drop(['basin_id'], axis=1)
basin_gdf['unique_id'] = basin_gdf.index

In [None]:
basin_gdf.crs

In [None]:
# merge the river and basins gdf and calculate the median channel slope in each basin
join = gpd.sjoin(river_gdf, basin_gdf, how='left', op='intersects')

  "(%s != %s)" % (left_df.crs, right_df.crs)


In [None]:
gr = join.groupby(['unique_id'])['slope'].agg(['median', 'std', percentile(16), percentile(84)]).rename(columns={'median': 'channel_slope_median', 'std': 'channel_slope_std', 'percentile_16': 'channel_slope_16th', 'percentile_84': 'channel_slope_84th'}).reset_index()

In [None]:
basin_gdf = basin_gdf.merge(gr, on='unique_id')

In [None]:
# now join the hillslope data
join = gpd.sjoin(basin_gdf, hs_gdf, how='left', op='contains')

In [None]:
join

In [None]:
# now join the hilltop data - find points within the basin and get the median curvature in each basin
ht_join = gpd.sjoin(ht_gdf, join, how='left', op='within')
gr = ht_join.groupby(['unique_id'])['curvature'].agg(['median', 'std', percentile(16), percentile(84)]).rename(columns={'median': 'ht_curv_median', 'std': 'ht_curv_std', 'percentile_16': 'ht_curv_16th', 'percentile_84': 'ht_curv_84th'}).reset_index()

In [None]:
gr

In [None]:
join = join.merge(gr, on='unique_id')

In [None]:
len(join.unique_id.unique())

In [None]:
join.to_file(data_dir+'SAF_only_channels_plus_hilltops_by_basin_SO3.shp')

In [None]:
len(join)