# Exposure Analysis with Point Overlaying Background Layer (Raster)

////////////////////////////////////////////////////////////////////////////////////
##### Author: Jay (Jiue-An) Yang, @JiueAnYang
##### Organization: Health Data at Scale Collaboratory, City of Hope
##### Last Updated: December 20, 2023
////////////////////////////////////////////////////////////////////////////////////
***

### Requirements:
##### 1. A file directory with .csv files containing GPS points
##### 2. A file directory with raster files of env variables that needs to be processed

***
### Model Workflow as in ArcGIS Model Builder

![Alt text](exposure_PointOverlay_v5.svg)

## Step 1: Parameter Setup

In [None]:
## Import required modules
import arcpy
from arcpy import env
from arcpy.sa import *
arcpy.CheckOutExtension("Spatial")

import glob, os, time
import numpy as np
import pandas as pd
from IPython.display import clear_output

## Set environment options
arcpy.env.overwriteOutput = True

## Set environment variables
env.workspace = r"C:/Users/Jay-PC/Desktop/Test/Point-Overlay-Input-Rasters.gdb"  # workspace: your project .gdb name
project_dir = r"C:/Users/Jay-PC/Desktop/Test/"                                   # directory
gps_data_dir = r"C:/Users/Jay-PC/Desktop/Test/Testing/test_data_csv/"            # directory
gps_data_dir = r"C:/Users/Jay-PC/Documents/UCSD/PQ/Outputs/from_PY/0918_2020/Stationary/"  # directory  Testing entire Folder

x_cord_name = 'lng'                                            # column name for x coordinates in point csv 
y_cord_name = 'lat'                                            # column name for y coordinates in point csv 
final_output_table_temp = env.workspace + "/table_temp"      # temporary table each loop 
final_output_table = env.workspace + "/final_output_table_PO"  # table name for final output in .gdb
final_output_table_output_dir = r"C:/Users/Jay-PC/Desktop/Test/"         # directory
final_output_table_csv_name = "Exposure_PointOverlay.csv"      # filename for final output in .csv format

## Set path to some specific layers
research_area = r"C:/Users/Jay-PC/Desktop/Test/Point-Overlay-Input-Rasters.gdb/SD_County_Boundary_proj"        # shapefile or feature class 

## Set the list of input Raster that will be overlay: format as [Raster, {Output Field Name}]
exposure_rasters = [[r"C:/Users/Jay-PC/Desktop/Test/Point-Overlay-Input-Rasters.gdb/FastFood2014","FastFood2014"],     
                    [r"C:/Users/Jay-PC/Desktop/Test/Point-Overlay-Input-Rasters.gdb/BikeRoutes","BikeRoutes"],
                    [r"C:/Users/Jay-PC/Desktop/Test/Point-Overlay-Input-Rasters.gdb/ParkArea2016","ParkArea2016"],
                    [r"C:/Users/Jay-PC/Desktop/Test/Point-Overlay-Input-Rasters.gdb/NO2Mean2019","NO2Mean2019"]]
exposure_fields = ["FastFood2014","BikeRoutes","ParkArea2016","NO2Mean2019"]


## Specify spatial reference for the analysis
spatial_ref = arcpy.SpatialReference('North America Albers Equal Area Conic')

## Specify spatial reference for the analysis
## getting ID from filename, example: "SD_points_PQ010122.csv" --> "PQ010122"
pt_ID_start = -12
pt_ID_end  = -4

## Step 2: Calculate Exposure

In [None]:
### Clear final output table if there is data inside
if arcpy.Exists(final_output_table):
    arcpy.Delete_management(final_output_table)

if arcpy.Exists(final_output_table_temp):
    arcpy.Delete_management(final_output_table_temp)

    
### Create a log file
from datetime import datetime
now = datetime.now()
dt_string = now.strftime("%d-%m-%Y-%H-%M-%S")
log_file_name = "log_" + dt_string + ".txt"
f = open(log_file_name, "a")


### Placeholders 
stats_frames = []       ## dataframe for writing .csv results 
non_processed_pts = []  ## list to store non-processed PTs

### Loop through .csv files in the data directory
start = time.time()
i = 1
total_i = len(glob.glob(gps_data_dir + "*"))

for file in glob.glob(gps_data_dir + "*.csv"):
# for file in glob.glob(gps_data_dir + "*.csv")[:1]:   # This will only run the first two PTs in the directory, for testing

    clear_output(wait=True)
    ## check if file is .csv
    if file[-4:] == '.csv':
        
        pt_ID = file[pt_ID_start:pt_ID_end]
        msg = "Working on : {pt} ({index}/{total})".format(pt = pt_ID, index = i, total= total_i)
        print (msg)
        
        ### make sure there are points in the .csv
        df = pd.read_csv(file)
        if len(df) > 0:
            ### --------------------------------------------------------
            ### Step 1: Create feature class from CSV points
            ### --------------------------------------------------------

            arcpy.management.XYTableToPoint(file, "point_fc", x_cord_name, y_cord_name, "", arcpy.SpatialReference(4326))
            print ("Step 1: csv converted to feature class")

            ### --------------------------------------------------------
            ### Step 2: Re-project feature class
            ### --------------------------------------------------------

            arcpy.Project_management("point_fc", "point_fc_proj", spatial_ref)
            print ("Step 2: feature class re-projected to - ", spatial_ref.name)

            ### --------------------------------------------------------
            ### Step 3: Clip feature class by analysis extent
            ### --------------------------------------------------------

            arcpy.Clip_analysis("point_fc_proj", research_area, "point_fc_cliped")
            print ("Step 3: feature class clipped by research area")

            ### --------------------------------------------------------
            ### Step 4: Extract Overlay VValues from Raster
            ### method will update existing point FC with new columns
            ### --------------------------------------------------------

            ExtractMultiValuesToPoints("point_fc_cliped", exposure_rasters, "NONE")   
            print ("Step 4: extracte raster values to points")

            ### --------------------------------------------------------
            ### Step 5: Run Summary Statistics for the exposure fields 
            ### --------------------------------------------------------

            ## create the statistics fields to run summary on
            stat_categories = ['SUM',"MEAN","MIN","MAX","RANGE","STD","MEDIAN","VARIANCE","COUNT"]
            stat_fields = []
            for exp in exposure_fields:
                for st in stat_categories:
                    stat_fields.append([exp,st])

            ## run summary statistics
            arcpy.Statistics_analysis("point_fc_cliped", final_output_table_temp, stat_fields)
            print ("Step 5: calculate summary statistics on exposure fields")

            ## Get a list of field names to display
            field_names = [i.name for i in arcpy.ListFields(final_output_table_temp) if i.type != 'OID']

            ## Open a cursor to extract results from stats table
            cursor = arcpy.da.SearchCursor(final_output_table_temp, field_names)

            ## Create a pandas dataframe to display results
            df = pd.DataFrame(data=[row for row in cursor],
                                  columns=field_names)

            df['PT_ID'] = pt_ID
            stats_frames.append(df)
 
        else:  ### Write to log file to record this pt without points
            non_processed_pts.append(pt_ID)
            f.write("Participant ID: {} was not processed.\n".format(pt_ID))
                        
        i+=1

        
### ------------------------------------------------------------------------------------
### Step 6: Concat PT outputs to one datafram, write to .csv and write to table in gdb
### ------------------------------------------------------------------------------------

### Check if there are resutls to be processed first 
if len(stats_frames) > 0:
    final_df = pd.concat(stats_frames)  

    ## add non-processed PTs to the final table
    df_non_processed =pd.DataFrame(non_processed_pts,columns=['PT_ID'])
    final_df = final_df.append(df_non_processed)

    ## move PT_ID column to the front 
    col_name="PT_ID"
    first_col = final_df.pop(col_name)
    final_df.insert(0, col_name, first_col)
    
    ## write results to .csv file 
    final_df.to_csv(final_output_table_output_dir + final_output_table_csv_name, index = False)
    
    ## Close the log file
    f.close()

    ## write results into workspace gdb
    ## Note !!  NumPyArrayToTable will not overwrite an existing table
    ## so check if there is already a table with the same filename there
    x = np.array(np.rec.fromrecords(final_df.values))
    names = final_df.dtypes.index.tolist()
    x.dtype.names = tuple(names)
    arcpy.da.NumPyArrayToTable(x, final_output_table)

    end = time.time()
    print ("-"*30)
    print ("Exposure Analysis with PointOverlay Completed, total time spent : ", end - start)
    print ("Output table available at : ", final_output_table_output_dir + final_output_table_csv_name)
    print ("Please also check the log file at : {}".format(log_file_name))
    
    ## Final output table is saved as a .csv file in the defined [final_output_table_output_dir] location.
    ## Final output table is also write to your workspace gdb

### if there are no resutls to be processed, write to the log file.
else:
    ### Close the log file
    f.write("-"*30)
    f.write("\nThere are no exposure resutls for the entire run.")
    f.close()
    print ("-"*30)
    print ("There are no exposure resutls for the entire run.")
    print ("Please also check the log file at : {}".format(log_file_name))