In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point

In [2]:
def percentile(n):
    def percentile_(x):
        return np.percentile(x, n)
    percentile_.__name__ = 'percentile_%s' % n
    return percentile_

In [3]:
df = pd.read_csv('/home/bjdd72/san_andreas/SAF_only/SAF_only_profiles_fault_dist_SO3.csv')
df = df[df['slope'] > 0]
df.columns

Index(['basin_id', 'id', 'node', 'distance_from_outlet', 'elevation',
       'drainage_area', 'stream_order', 'slope', 'latitude', 'longitude',
       'new_id', 'fault_dist', 'direction'],
      dtype='object')

In [4]:
# convert the river csv to a geodataframe. Remove the non-unique ID labels - these will be replaced by unique basin IDs
geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)]
crs = 'epsg:4326' #http://www.spatialreference.org/ref/epsg/2263/
river_gdf = gpd.GeoDataFrame(df.drop(['latitude','longitude','basin_id','id','new_id','node'], axis=1), crs=crs, geometry=geometry)

basin_gdf = gpd.read_file('/home/bjdd72/san_andreas/SAF_only_basins_deflection.shp')

In [5]:
# add a unique id to the basin
basin_gdf = basin_gdf.drop(['basin_id'], axis=1)
basin_gdf['unique_id'] = basin_gdf.index

In [6]:
river_gdf

Unnamed: 0,distance_from_outlet,elevation,drainage_area,stream_order,slope,fault_dist,direction,geometry
0,472.956000,754.330000,1443,1,0.280238,524.210347,-0.000194,POINT (-120.77122 36.20501)
1,471.541504,754.309998,1445,1,0.284092,524.210347,-0.000194,POINT (-120.77121 36.20502)
2,470.541504,753.969971,1446,1,0.288058,524.210347,-0.000194,POINT (-120.77120 36.20503)
3,469.541504,753.500000,1449,1,0.289001,524.210347,-0.000195,POINT (-120.77119 36.20503)
4,468.541504,752.979980,1450,1,0.289479,524.210347,-0.000195,POINT (-120.77118 36.20503)
...,...,...,...,...,...,...,...,...
5993315,3.414214,19.459999,56283,3,0.034965,1061.679334,0.000049,POINT (-116.03499 33.61016)
5993316,2.414214,19.430000,56300,3,0.034903,1061.679334,0.000049,POINT (-116.03500 33.61016)
5993317,1.000000,19.369999,56363,3,0.035012,1061.679334,0.000049,POINT (-116.03501 33.61016)
5993318,0.000000,19.330000,56364,3,0.034542,1061.679334,0.000049,POINT (-116.03502 33.61017)


In [7]:
basin_gdf

Unnamed: 0,basin_area,azimuth,deflection,latitude,longitude,fault_dist,direction,geometry,unique_id
0,1.000000,19.419021,65.075406,39.021498,-123.698779,117.191340,-0.000335,"POLYGON ((-123.69989 39.01899, -123.69988 39.0...",0
1,53309.000000,7.964627,50.894860,39.021498,-123.698779,117.191340,-0.000335,"POLYGON ((-123.69878 39.02150, -123.69877 39.0...",1
2,18131.000000,304.337516,21.060099,39.019370,-123.709397,117.191340,-0.000131,"POLYGON ((-123.70842 39.02008, -123.70840 39.0...",2
3,1.000000,359.822731,40.489295,39.019973,-123.703650,117.191340,-0.000233,"POLYGON ((-123.70362 39.01714, -123.70360 39.0...",3
4,69954.000000,346.132682,23.177698,39.019973,-123.703650,117.191340,-0.000233,"POLYGON ((-123.70366 39.01998, -123.70364 39.0...",4
...,...,...,...,...,...,...,...,...,...
21540,56975.857591,158.762914,28.661965,33.556380,-115.967462,1070.409964,-0.000002,"POLYGON ((-115.96984 33.56292, -115.96982 33.5...",21540
21541,57709.183178,156.197708,25.725476,33.556374,-115.967839,1070.409964,0.000003,"POLYGON ((-115.97017 33.56522, -115.97016 33.5...",21541
21542,23006.307740,143.018528,11.224846,33.556366,-115.967839,1070.409964,0.000003,"POLYGON ((-115.97072 33.56166, -115.97070 33.5...",21542
21543,31423.143546,159.512253,29.523176,33.554631,-115.971845,1070.409964,0.000088,"POLYGON ((-115.97383 33.56057, -115.97382 33.5...",21543


In [8]:
join = gpd.sjoin(river_gdf, basin_gdf, how='left', op='intersects')

In [9]:
gr = join.groupby(['unique_id'])['slope'].agg(['median', 'std', percentile(25), percentile(75)]).rename(columns={'median': 'slope_median', 'std': 'slope_std', 'percentile_25': 'slope_q1', 'percentile_75': 'slope_q2'}).reset_index()

In [10]:
basin_gdf = basin_gdf.merge(gr, on='unique_id')

In [11]:
basin_gdf

Unnamed: 0,basin_area,azimuth,deflection,latitude,longitude,fault_dist,direction,geometry,unique_id,slope_median,slope_std,slope_q1,slope_q2
0,53309.000000,7.964627,50.894860,39.021498,-123.698779,117.191340,-0.000335,"POLYGON ((-123.69878 39.02150, -123.69877 39.0...",1,0.000503,0.000016,0.000494,0.000512
1,18131.000000,304.337516,21.060099,39.019370,-123.709397,117.191340,-0.000131,"POLYGON ((-123.70842 39.02008, -123.70840 39.0...",2,0.000512,0.000450,0.000498,0.000563
2,69954.000000,346.132682,23.177698,39.019973,-123.703650,117.191340,-0.000233,"POLYGON ((-123.70366 39.01998, -123.70364 39.0...",4,0.000503,0.000034,0.000487,0.000525
3,76149.000000,314.561815,11.609073,39.019973,-123.703627,117.191340,-0.000234,"POLYGON ((-123.70208 39.02186, -123.70207 39.0...",5,0.000513,0.000781,0.000491,0.000541
4,20501.000000,87.725781,50.831639,39.016151,-123.694868,117.191340,-0.000322,"POLYGON ((-123.69568 39.01741, -123.69565 39.0...",7,0.000502,0.001032,0.000493,0.000773
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18903,56975.857591,158.762914,28.661965,33.556380,-115.967462,1070.409964,-0.000002,"POLYGON ((-115.96984 33.56292, -115.96982 33.5...",21540,0.026122,0.006340,0.019346,0.030307
18904,57709.183178,156.197708,25.725476,33.556374,-115.967839,1070.409964,0.000003,"POLYGON ((-115.97017 33.56522, -115.97016 33.5...",21541,0.026596,0.007118,0.022375,0.034969
18905,23006.307740,143.018528,11.224846,33.556366,-115.967839,1070.409964,0.000003,"POLYGON ((-115.97072 33.56166, -115.97070 33.5...",21542,0.021432,0.005368,0.017863,0.025450
18906,31423.143546,159.512253,29.523176,33.554631,-115.971845,1070.409964,0.000088,"POLYGON ((-115.97383 33.56057, -115.97382 33.5...",21543,0.019007,0.002977,0.016944,0.020878


In [12]:
basin_gdf.to_file('/home/bjdd72/san_andreas/SAF_only/SAF_only_channel_slopes_by_basin_SO3.shp')