In [3]:
#If running in Anaconda/Jupyter Notebook, create a new anaconda environment and install geopandas, otherwise it won't run

#Using NPS Lands Layer Package

import geopandas as gpd
import pandas as pd

    
ALLCRASHES_ALLCOORDS = pd.read_csv(r"C:\Users\Sophie.Kaye\DOT OST\volpe-proj-VU16A100 - Transportation Safety Program\Region Briefing\Data\Output Data\IMARS_slim_clean_allcoords_noAKR.csv")
ALLBOUNDARIES=gpd.read_file(r"C:\Users\Sophie.Kaye\DOT OST\volpe-proj-VU16A100 - Transportation Safety Program\Region Briefing\Data\GIS\Other shapefiles\nps_boundary.shp")

def NCR_Cleaner_ALLCOORDS(ALLCRASHES_ALLCOORDS):
    
    #This is the list of parks that (1) are not assigned the NACA label in the NPS Lands Boundary and
    #(2) also have crashes with a park and coordinate filled; may not be entirely complete but should cover
    #any parks that would be in the top 15 for the NCR
    
    nonNACAlist=["GWMP","ROCR","LINC","MANA","JEFM","CATO","PRWI","CLBA","THIS","COGA","FRDE","GREE",
                 "ANTI","PAAV","PISC","MONO","WHHO","FOWA","HAFE","ARHO","WOTR","MLKM","DDEM","CHOH",
                 "BEPA","LYBA","MALL","MABE","WWII","FOTH","WAMO","KOWA","FRDO","CAWO","VIVE","WWIM"]
    
    for crash in range(len(ALLCRASHES_ALLCOORDS)):
        park=ALLCRASHES_ALLCOORDS.iloc[crash][47]
        region=ALLCRASHES_ALLCOORDS.iloc[crash][48]
        
        if park not in nonNACAlist and region=="NCR":
            ALLCRASHES_ALLCOORDS.iat[crash,47]="NACA"
            
    return ALLCRASHES_ALLCOORDS


def crashChooser_ALLCOORDS(ALLCRASHES_ALLCOORDS_CLEAN,ALLBOUNDARIES,parkCode):
        
    #Take park crashes, turn into a dataframe with coords, change from geometric to projected coords for sjoin
    park_crashes_df=ALLCRASHES_ALLCOORDS_CLEAN.loc[ALLCRASHES_ALLCOORDS_CLEAN['Park']==parkCode]
    park_crashes=gpd.GeoDataFrame(park_crashes_df, geometry=gpd.points_from_xy(park_crashes_df.LONGITUDE,park_crashes_df.LATITUDE))
    proj_park_crashes=park_crashes.set_crs(epsg=3857)
    
    return proj_park_crashes

def boundaryChooser_ALLCOORDS(ALLCRASHES_ALLCOORDS_CLEAN,ALLBOUNDARIES,parkCode):
    
    #Take park boundary(ies), change from geometric to projected coords for sjoin
    park_polygon=ALLBOUNDARIES.loc[ALLBOUNDARIES['UNIT_CODE']==parkCode]
    proj_park_polygon=park_polygon.set_crs(epsg=3857,allow_override=True)          
        
    return proj_park_polygon
    

def sjoin_0_ALLCOORDS(proj_park_crashes_clean, proj_park_polygon):
    
    return gpd.sjoin(proj_park_polygon,proj_park_crashes_clean,how='left')


def sjoin_1_ALLCOORDS(proj_park_crashes_clean, proj_park_polygon):
    
    #Take park boundary(ies) with projected coords, add buffer, then reformat to geodataseries
    park_polygon_1_buffer_geoseries=gpd.GeoSeries.buffer(proj_park_polygon,0.0145055773)
    park_polygon_1_buffer=gpd.GeoDataFrame(geometry=gpd.GeoSeries(park_polygon_1_buffer_geoseries))

    return gpd.sjoin(park_polygon_1_buffer,proj_park_crashes_clean,how='left')


def sjoin_10_ALLCOORDS(proj_park_crashes_clean, proj_park_polygon):
    
    #Take park boundary(ies) with projected coords, add buffer, then reformat to geodataseries
    park_polygon_10_buffer_geoseries=gpd.GeoSeries.buffer(proj_park_polygon,0.1450557739)
    park_polygon_10_buffer=gpd.GeoDataFrame(geometry=gpd.GeoSeries(park_polygon_10_buffer_geoseries))

    return gpd.sjoin(park_polygon_10_buffer,proj_park_crashes_clean,how='left')


def sjoin_100_ALLCOORDS(proj_park_crashes_clean, proj_park_polygon):
    
    #Take park boundary(ies) with projected coords, add buffer, then reformat to geodataseries
    park_polygon_100_buffer_geoseries=gpd.GeoSeries.buffer(proj_park_polygon,1.45055774)
    park_polygon_100_buffer=gpd.GeoDataFrame(geometry=gpd.GeoSeries(park_polygon_100_buffer_geoseries))

    return gpd.sjoin(park_polygon_100_buffer,proj_park_crashes_clean,how='left')


def NoCoordsCleaner_ALLCOORDS(proj_park_crashes):

    import numpy as np
    
    noCoords = 0
    for i in range(len(proj_park_crashes)):
        if np.isnan(proj_park_crashes.iloc[i]['LATITUDE']) or np.isnan(proj_park_crashes.iloc[i]['LONGITUDE']):
            noCoords = noCoords + 1
        else:
            noCoords = noCoords 
    
    #noCoords=0
    
    #for crash in range(len(proj_park_crashes)):
        
        #if pd.isnull(proj_park_crashes.iloc[crash][45])==True or pd.isnull(proj_park_crashes.iloc[crash][46])==True:    
         #   noCoords+=1
          #  #proj_park_crashes.drop(crash)
            
    return proj_park_crashes, noCoords
    

def calculations_ALLCOORDS(proj_park_crashes, proj_park_polygon, OutputDataFrame_AllCoords, output_df_park, output_df_region):
    
    proj_park_crashes_clean, noCoords=NoCoordsCleaner_ALLCOORDS(proj_park_crashes)
    
    within0=len(sjoin_0_ALLCOORDS(proj_park_crashes_clean, proj_park_polygon))
    within1=len(sjoin_1_ALLCOORDS(proj_park_crashes_clean, proj_park_polygon))
    within10=len(sjoin_10_ALLCOORDS(proj_park_crashes_clean, proj_park_polygon))
    within100=len(sjoin_100_ALLCOORDS(proj_park_crashes_clean, proj_park_polygon))
    
    #Unable to drop crashes without coordinates in NoCoordsCleaner_ALLCOORDS, receiving errors
    #Workaround by subtracting crashes without coordinates from crashes over 100 miles outside of park boundary
    
    totalCrashes=len(proj_park_crashes_clean)
    over100=totalCrashes-within100-noCoords
    over10=within100-within10
    over1=within10-within1
    over0=within1-within0
    inBoundary=within0
    #over100=totalCrashes-noCoords-over10-over1-over0-inBoundary
    
    OutputDataFrame_AllCoords.loc[len(OutputDataFrame_AllCoords.index)]=[output_df_park,output_df_region,inBoundary,over0,over1,over10,over100,noCoords,totalCrashes]
    
    return OutputDataFrame_AllCoords
    
    
def main_ALLCOORDS():
    
    OutputDataFrame_AllCoords=pd.DataFrame(columns=["Park","Region","Within Boundary","<1mi Outside","1-10mi Outside",
                                          "10-100mi Outside",">100mi Outside","No Coordinates","Total Crashes"])
    
    ALLCRASHES_ALLCOORDS_CLEAN=NCR_Cleaner_ALLCOORDS(ALLCRASHES_ALLCOORDS)
    
    for park in range(len(ALLBOUNDARIES)): #for every park in the full set of boundaries 
        
        parkCode=ALLBOUNDARIES.loc[park][1] #take individual park code
        
        proj_park_crashes=crashChooser_ALLCOORDS(ALLCRASHES_ALLCOORDS_CLEAN,ALLBOUNDARIES,parkCode) #select park-specific crashes
        proj_park_polygon=boundaryChooser_ALLCOORDS(ALLCRASHES_ALLCOORDS_CLEAN,ALLBOUNDARIES,parkCode) #select park-specific boundary(ies)
        
        output_df_park=proj_park_polygon.iloc[0][1] #select park code
        output_df_region=str(proj_park_polygon.iloc[0][6])+"R" #select region code
        
        #Some AKR parks are recorded twice in an input dataset, must not record duplicates
        
        duplicate=output_df_park in OutputDataFrame_AllCoords["Park"].values
        if duplicate==False:     
        
            if len(proj_park_crashes)==0: #if no crashes in a park, don't do spatial join calcs and add 0s to output df
                
                OutputDataFrame_AllCoords.loc[len(OutputDataFrame_AllCoords.index)]=[output_df_park,output_df_region,0,0,0,0,0,0,0]
            
            else:
                
                OutputDataFrame_AllCoords=calculations_ALLCOORDS(proj_park_crashes, proj_park_polygon, OutputDataFrame_AllCoords, output_df_park, output_df_region)
            
    #Output spreadsheet here: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    OutputDataFrame_AllCoords = OutputDataFrame_AllCoords.sort_values(by="Park")
    OutputDataFrame_AllCoords.to_excel(r"C:\Users\Sophie.Kaye\DOT OST\volpe-proj-VU16A100 - Transportation Safety Program\Region Briefing\Data\Output Data\Final Coordinate Stats and Charts AllCoords No AKR.xlsx", sheet_name="Output Data", index = False)
    
main_ALLCOORDS()

In [5]:
ALLCRASHES_NOCOORDS = pd.read_csv(r"C:\Users\Sophie.Kaye\DOT OST\volpe-proj-VU16A100 - Transportation Safety Program\Region Briefing\Data\Output Data\IMARS_slim_clean_noAKR.csv")
ALLBOUNDARIES=gpd.read_file(r"C:\Users\Sophie.Kaye\DOT OST\volpe-proj-VU16A100 - Transportation Safety Program\Region Briefing\Data\GIS\Other shapefiles\nps_boundary.shp")

def NCR_Cleaner_NOCOORDS(ALLCRASHES_NOCOORDS):
    
    #This is the list of parks that (1) are not assigned the NACA label in the NPS Lands Boundary and
    #(2) also have crashes with a park and coordinate filled; may not be entirely complete but should cover
    #any parks that would be in the top 15 for the NCR
    
    nonNACAlist=["GWMP","ROCR","LINC","MANA","JEFM","CATO","PRWI","CLBA","THIS","COGA","FRDE","GREE",
                 "ANTI","PAAV","PISC","MONO","WHHO","FOWA","HAFE","ARHO","WOTR","MLKM","DDEM","CHOH",
                 "BEPA","LYBA","MALL","MABE","WWII","FOTH","WAMO","KOWA","FRDO","CAWO","VIVE","WWIM"]
    
    for crash in range(len(ALLCRASHES_NOCOORDS)):
        park=ALLCRASHES_NOCOORDS.iloc[crash][47]
        region=ALLCRASHES_NOCOORDS.iloc[crash][48]
        
        if park not in nonNACAlist and region=="NCR":
            ALLCRASHES_NOCOORDS.iat[crash,47]="NACA"
            
    return ALLCRASHES_NOCOORDS


def crashChooser_NOCOORDS(ALLCRASHES_NOCOORDS_CLEAN,ALLBOUNDARIES,parkCode):
        
    #Take park crashes, turn into a dataframe with coords, change from geometric to projected coords for sjoin
    park_crashes_df=ALLCRASHES_NOCOORDS_CLEAN.loc[ALLCRASHES_NOCOORDS_CLEAN['Park']==parkCode]
    park_crashes=gpd.GeoDataFrame(park_crashes_df, geometry=gpd.points_from_xy(park_crashes_df.LONGITUDE,park_crashes_df.LATITUDE))
    proj_park_crashes=park_crashes.set_crs(epsg=3857)
    
    return proj_park_crashes

def boundaryChooser_NOCOORDS(ALLCRASHES_NOCOORDS_CLEAN,ALLBOUNDARIES,parkCode):
    
    #Take park boundary(ies), change from geometric to projected coords for sjoin
    park_polygon=ALLBOUNDARIES.loc[ALLBOUNDARIES['UNIT_CODE']==parkCode]
    proj_park_polygon=park_polygon.set_crs(epsg=3857,allow_override=True)          
        
    return proj_park_polygon
    

def sjoin_0_NOCOORDS(proj_park_crashes_clean, proj_park_polygon):
    
    return gpd.sjoin(proj_park_polygon,proj_park_crashes_clean,how='left')


def sjoin_1_NOCOORDS(proj_park_crashes_clean, proj_park_polygon):
    
    #Take park boundary(ies) with projected coords, add buffer, then reformat to geodataseries
    park_polygon_1_buffer_geoseries=gpd.GeoSeries.buffer(proj_park_polygon,0.0145055773)
    park_polygon_1_buffer=gpd.GeoDataFrame(geometry=gpd.GeoSeries(park_polygon_1_buffer_geoseries))

    return gpd.sjoin(park_polygon_1_buffer,proj_park_crashes_clean,how='left')


def sjoin_10_NOCOORDS(proj_park_crashes_clean, proj_park_polygon):
    
    #Take park boundary(ies) with projected coords, add buffer, then reformat to geodataseries
    park_polygon_10_buffer_geoseries=gpd.GeoSeries.buffer(proj_park_polygon,0.1450557739)
    park_polygon_10_buffer=gpd.GeoDataFrame(geometry=gpd.GeoSeries(park_polygon_10_buffer_geoseries))

    return gpd.sjoin(park_polygon_10_buffer,proj_park_crashes_clean,how='left')


def sjoin_100_NOCOORDS(proj_park_crashes_clean, proj_park_polygon):
    
    #Take park boundary(ies) with projected coords, add buffer, then reformat to geodataseries
    park_polygon_100_buffer_geoseries=gpd.GeoSeries.buffer(proj_park_polygon,1.45055774)
    park_polygon_100_buffer=gpd.GeoDataFrame(geometry=gpd.GeoSeries(park_polygon_100_buffer_geoseries))

    return gpd.sjoin(park_polygon_100_buffer,proj_park_crashes_clean,how='left')


def noCoordsCleaner_NOCOORDS(proj_park_crashes):

    import numpy as np
    
    noCoords = 0
    for i in range(len(proj_park_crashes)):
        if np.isnan(proj_park_crashes.iloc[i]['LATITUDE']) or np.isnan(proj_park_crashes.iloc[i]['LONGITUDE']):
            noCoords = noCoords + 1
        else:
            noCoords = noCoords 
    
    #noCoords=0
    
    #for crash in range(len(proj_park_crashes)):
        
        #if pd.isnull(proj_park_crashes.iloc[crash][45])==True or pd.isnull(proj_park_crashes.iloc[crash][46])==True:    
         #   noCoords+=1
          #  #proj_park_crashes.drop(crash)
            
    return proj_park_crashes, noCoords
    

def calculations_NOCOORDS(proj_park_crashes, proj_park_polygon, OutputDataFrame_NoCoords, output_df_park, output_df_region):
    
    proj_park_crashes_clean, noCoords=noCoordsCleaner_NOCOORDS(proj_park_crashes)
    
    within0=len(sjoin_0_NOCOORDS(proj_park_crashes_clean, proj_park_polygon))
    within1=len(sjoin_1_NOCOORDS(proj_park_crashes_clean, proj_park_polygon))
    within10=len(sjoin_10_NOCOORDS(proj_park_crashes_clean, proj_park_polygon))
    within100=len(sjoin_100_NOCOORDS(proj_park_crashes_clean, proj_park_polygon))
    
    #Unable to drop crashes without coordinates in noCoordsCleaner_NOCOORDS, receiving errors
    #Workaround by subtracting crashes without coordinates from crashes over 100 miles outside of park boundary
    
    totalCrashes=len(proj_park_crashes_clean)
    over100=totalCrashes-within100-noCoords
    over10=within100-within10
    over1=within10-within1
    over0=within1-within0
    inBoundary=within0
    #over100=totalCrashes-noCoords-over10-over1-over0-inBoundary
    
    OutputDataFrame_NoCoords.loc[len(OutputDataFrame_NoCoords.index)]=[output_df_park,output_df_region,inBoundary,over0,over1,over10,over100,noCoords,totalCrashes]
    
    return OutputDataFrame_NoCoords
    
    
def main_NOCOORDS():
    
    OutputDataFrame_NoCoords=pd.DataFrame(columns=["Park","Region","Within Boundary","<1mi Outside","1-10mi Outside",
                                          "10-100mi Outside",">100mi Outside","No Coordinates","Total Crashes"])
    
    ALLCRASHES_NOCOORDS_CLEAN=NCR_Cleaner_NOCOORDS(ALLCRASHES_NOCOORDS)
    
    for park in range(len(ALLBOUNDARIES)): #for every park in the full set of boundaries 
        
        parkCode=ALLBOUNDARIES.loc[park][1] #take individual park code
        
        proj_park_crashes=crashChooser_NOCOORDS(ALLCRASHES_NOCOORDS_CLEAN,ALLBOUNDARIES,parkCode) #select park-specific crashes
        proj_park_polygon=boundaryChooser_NOCOORDS(ALLCRASHES_NOCOORDS_CLEAN,ALLBOUNDARIES,parkCode) #select park-specific boundary(ies)
        
        output_df_park=proj_park_polygon.iloc[0][1] #select park code
        output_df_region=str(proj_park_polygon.iloc[0][6])+"R" #select region code
        
        #Some AKR parks are recorded twice in an input dataset, must not record duplicates
        
        duplicate=output_df_park in OutputDataFrame_NoCoords["Park"].values
        if duplicate==False:     
        
            if len(proj_park_crashes)==0: #if no crashes in a park, don't do spatial join calcs and add 0s to output df
                
                OutputDataFrame_NoCoords.loc[len(OutputDataFrame_NoCoords.index)]=[output_df_park,output_df_region,0,0,0,0,0,0,0]
            
            else:
                
                OutputDataFrame_NoCoords=calculations_NOCOORDS(proj_park_crashes, proj_park_polygon, OutputDataFrame_NoCoords, output_df_park, output_df_region)
            
    #Output spreadsheet here: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    OutputDataFrame_NoCoords = OutputDataFrame_NoCoords.sort_values(by="Park")
    OutputDataFrame_NoCoords.to_excel(r"C:\Users\Sophie.Kaye\DOT OST\volpe-proj-VU16A100 - Transportation Safety Program\Region Briefing\Data\Output Data\Final Coordinate Stats and Charts No AKR.xlsx", sheet_name="Output Data", index = False)
        
main_NOCOORDS()