In [3]:
#Using NPS Lands Layer Package

import geopandas as gpd
import pandas as pd

    
ALLCRASHES = pd.read_csv(r"C:\Users\Christopher.Dettmer\Documents\TSP\Using New Data\IMARS_slim_clean_forChris.csv")
ALLBOUNDARIES=gpd.read_file(r"C:\Users\Christopher.Dettmer\Documents\TSP\Spatial Join Files\nps_boundary.shp")


def crashChooser(ALLCRASHES,ALLBOUNDARIES,parkCode):
        
    #Take park crashes, turn into a dataframe with coords, change from geometric to projected coords for sjoin
    park_crashes_df=ALLCRASHES.loc[ALLCRASHES['Park']==parkCode]
    park_crashes=gpd.GeoDataFrame(park_crashes_df, geometry=gpd.points_from_xy(park_crashes_df.Longitude,park_crashes_df.Latitude))
    proj_park_crashes=park_crashes.set_crs(epsg=3857)
    
    return proj_park_crashes

def boundaryChooser(ALLCRASHES,ALLBOUNDARIES,parkCode):
    
    #Take park boundary(ies), change from geometric to projected coords for sjoin
    park_polygon=ALLBOUNDARIES.loc[ALLBOUNDARIES['UNIT_CODE']==parkCode]
    proj_park_polygon=park_polygon.set_crs(epsg=3857,allow_override=True)          
        
    return proj_park_polygon
    

def sjoin_0(proj_park_crashes, proj_park_polygon):
    
    return gpd.sjoin(proj_park_polygon,proj_park_crashes,how='left')


def sjoin_1(proj_park_crashes, proj_park_polygon):
    
    #Take park boundary(ies) with projected coords, add buffer, then reformat to geodataseries
    park_polygon_1_buffer_geoseries=gpd.GeoSeries.buffer(proj_park_polygon,0.0145055773)
    park_polygon_1_buffer=gpd.GeoDataFrame(geometry=gpd.GeoSeries(park_polygon_1_buffer_geoseries))

    return gpd.sjoin(park_polygon_1_buffer,proj_park_crashes,how='left')


def sjoin_10(proj_park_crashes, proj_park_polygon):
    
    #Take park boundary(ies) with projected coords, add buffer, then reformat to geodataseries
    park_polygon_10_buffer_geoseries=gpd.GeoSeries.buffer(proj_park_polygon,0.1450557739)
    park_polygon_10_buffer=gpd.GeoDataFrame(geometry=gpd.GeoSeries(park_polygon_10_buffer_geoseries))

    return gpd.sjoin(park_polygon_10_buffer,proj_park_crashes,how='left')


def sjoin_100(proj_park_crashes, proj_park_polygon):
    
    #Take park boundary(ies) with projected coords, add buffer, then reformat to geodataseries
    park_polygon_100_buffer_geoseries=gpd.GeoSeries.buffer(proj_park_polygon,1.45055774)
    park_polygon_100_buffer=gpd.GeoDataFrame(geometry=gpd.GeoSeries(park_polygon_100_buffer_geoseries))

    return gpd.sjoin(park_polygon_100_buffer,proj_park_crashes,how='left')


def calculations(proj_park_crashes, proj_park_polygon, outputDataFrame, output_df_park, output_df_region):
    
    within0=len(sjoin_0(proj_park_crashes, proj_park_polygon))
    within1=len(sjoin_1(proj_park_crashes, proj_park_polygon))
    within10=len(sjoin_10(proj_park_crashes, proj_park_polygon))
    within100=len(sjoin_100(proj_park_crashes, proj_park_polygon))
    
    totalCrashes=len(proj_park_crashes)
    over100=totalCrashes-within100
    over10=within100-within10
    over1=within10-within1
    over0=within1-within0
    inBoundary=within0
    
    outputDataFrame.loc[len(outputDataFrame.index)]=[output_df_park,output_df_region,inBoundary,over0,over1,over10,over100,totalCrashes]
    
    return outputDataFrame
    
    
def main():
    
    outputDataFrame=pd.DataFrame(columns=["Park","Region","Within Boundary","<1mi Outside","1-10mi Outside",
                                          "10-100mi Outside",">100mi Outside","Total Crashes"])
    
    for park in range(len(ALLBOUNDARIES)): #for every park in the full set of boundaries 
        parkCode=ALLBOUNDARIES.loc[park][1] #take individual park code
        
        proj_park_crashes=crashChooser(ALLCRASHES,ALLBOUNDARIES,parkCode) #select park-specific crashes
        proj_park_polygon=boundaryChooser(ALLCRASHES,ALLBOUNDARIES,parkCode) #select park-specific boundary(ies)
        
        output_df_park=proj_park_polygon.iloc[0][1] #select park code
        output_df_region=str(proj_park_polygon.iloc[0][6])+"R" #select region code
        
        #Some AKR parks are recorded twice in an input dataset, must not record duplicates
        
        duplicate=output_df_park in outputDataFrame["Park"].values
        if duplicate==False:     
        
            if len(proj_park_crashes)==0: #if no crashes in a park, don't do spatial join calcs and add 0s to output df
                outputDataFrame.loc[len(outputDataFrame.index)]=[output_df_park,output_df_region,0,0,0,0,0,0]
            else:
                outputDataFrame=calculations(proj_park_crashes, proj_park_polygon, outputDataFrame, output_df_park, output_df_region)
            
    #Output spreadsheet code here: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    outputDataFrame.to_excel(r"C:\Users\Christopher.Dettmer\Documents\TSP\Spatial Join Files\All IMARS Crashes National and Region Coords Spatial Join Error Stats and Charts.xlsx",
                             sheet_name="Output Data", index = False)

main()
