In [3]:
import arcpy
import requests
import os
from zipfile import ZipFile
import json
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [4]:
# Define local directory
local_directory = r"E:\coursework\ARCGISI\FinalProject\FP\data"
# Create the local directory if it doesn't exist
if not os.path.exists(local_directory):
    os.makedirs(local_directory)
    
arcpy.env.workspace = r"E:\coursework\ARCGISI\FinalProject\FP\data"

### Read Pick-up Data

In [5]:
pickup_df = pd.read_csv(os.path.join(local_directory,'uber-raw-data-apr14.csv'))
pickup_df[['Date', 'Time']] = pickup_df['Date/Time'].str.split(' ', expand=True)


# Extract the 'Hour' from the 'Time' column
pickup_df['Hour'] = pd.to_datetime(pickup_df['Time']).dt.hour

# Round the timestamp to the nearest 15-minute interval
#pickup_df['Time15MIN'] = pd.to_datetime(pickup_df['Date/Time']).round('15min')

# Drop the original 'Date/Time' column
pickup_df.drop(columns=['Date/Time','Base'], inplace=True)

#remove outlier
pickup_df = pickup_df[pickup_df['Lon']<-72.70]

pickup_df

Unnamed: 0,Lat,Lon,Date,Time,Hour
0,40.7521,-73.9914,5/1/2014,0:02:00,0
1,40.6965,-73.9715,5/1/2014,0:06:00,0
2,40.7464,-73.9838,5/1/2014,0:15:00,0
3,40.7463,-74.0011,5/1/2014,0:17:00,0
4,40.7594,-73.9734,5/1/2014,0:17:00,0
...,...,...,...,...,...
652430,40.7309,-74.0014,5/31/2014,23:45:00,23
652431,40.7528,-73.9798,5/31/2014,23:52:00,23
652432,40.7158,-73.9519,5/31/2014,23:55:00,23
652433,40.6961,-73.8997,5/31/2014,23:56:00,23


In [8]:
sr = arcpy.SpatialReference(4326) # WGS 1984

# create a new feature class
arcpy.CreateFeatureclass_management(arcpy.env.workspace, "PickUp_Apr", "POINT",spatial_reference=sr)

# add fields to the feature class
arcpy.AddField_management("PickUp_Apr.shp", "Lat", "Double")
arcpy.AddField_management("PickUp_Apr.shp", "Lon", 'Double')
arcpy.AddField_management("PickUp_Apr.shp", "Date", 'DATE')
arcpy.AddField_management("PickUp_Apr.shp", "Time", 'TEXT')
arcpy.AddField_management("PickUp_Apr.shp", "Hour", 'Short')
#arcpy.AddField_management("PickUp_Apr.shp", "Interval", 'TEXT')

# insert data into the feature class
cursor = arcpy.da.InsertCursor("PickUp_Apr.shp", ["SHAPE@", "Lat",'Lon','Date','Time','Hour'])
for index,row in pickup_df.iterrows():
    point = arcpy.Point(row[1], row[0])
    cursor.insertRow([point, row[0], row[1],row[2], row[3],row[4]])
del cursor

### Read Boundary Data

In [39]:
# reproject county data
arcpy.management.Project("US_county_2014.shp", r"county.shp", sr)

# reproject sub-county data78U
arcpy.management.Project("US_cty_sub_2014.shp", r"subcounty.shp", sr)

In [76]:
# Define the bounding box coordinates
xmin, ymin, xmax, ymax = (pickup_df['Lon'].min(), pickup_df['Lat'].min(), pickup_df['Lon'].max(), pickup_df['Lat'].max())

# Create a bounding box polygon
bounding_box = arcpy.Polygon(
    arcpy.Array([arcpy.Point(xmin, ymin),
                 arcpy.Point(xmax, ymin),
                 arcpy.Point(xmax, ymax),
                 arcpy.Point(xmin, ymax)]),sr)

arcpy.SelectLayerByLocation_management("county", "INTERSECT", bounding_box)
arcpy.CopyFeatures_management("county", "selected_county.shp")

arcpy.SelectLayerByLocation_management("subcounty", "INTERSECT", bounding_box)
arcpy.CopyFeatures_management("subcounty", "selected_subcounty.shp")

### Read ACS Data

In [25]:
ACS_county = pd.read_csv(os.path.join(local_directory,'nhgis0036_ds239_20185_county.csv'),encoding='latin-1')
ACS_county = ACS_county[['GISJOIN','STATE','AJWME001','AJW6E001','AJ1CE001','AJ1SE001']]
ACS_county = ACS_county.rename(columns={"AJWME001": "POP", "AJW6E001": "WPLACE","AJ1CE001": "WPOP","AJ1SE001": "HOUSE"})
ACS_county.to_csv(os.path.join(local_directory,'ACS_county.csv'),index=False)
ACS_county

Unnamed: 0,GISJOIN,STATE,POP,WPLACE,WPOP,HOUSE
0,G0100010,Alabama,55200,24428.0,43368.0,23315
1,G0100030,Alabama,208107,91420.0,167712.0,111945
2,G0100050,Alabama,25782,0.0,20948.0,11937
3,G0100070,Alabama,22527,7946.0,18470.0,9161
4,G0100090,Alabama,57645,21148.0,45915.0,24222
...,...,...,...,...,...,...
3215,G7201450,Puerto Rico,53371,14194.0,43815.0,24596
3216,G7201470,Puerto Rico,8771,0.0,7042.0,4934
3217,G7201490,Puerto Rico,22993,6357.0,18663.0,9089
3218,G7201510,Puerto Rico,34149,7513.0,28146.0,14736


In [77]:
ACS_subcounty = pd.read_csv(os.path.join(local_directory,'nhgis0037_ds239_20185_cty_sub.csv'),encoding='latin-1')
ACS_subcounty = ACS_subcounty[['GISJOIN','STATE','COUNTY','AJWME001','AJW6E001','AJ1CE001','AJ1SE001']]
ACS_subcounty = ACS_subcounty.rename(columns={"AJWME001": "POP", "AJW6E001": "WPLACE","AJ1CE001": "WPOP","AJ1SE001": "HOUSE"})
ACS_subcounty.to_csv(os.path.join(local_directory,'ACS_subcounty.csv'),index=False)
ACS_subcounty

Unnamed: 0,GISJOIN,STATE,COUNTY,POP,WPLACE,WPOP,HOUSE
0,G010001090171,Alabama,Autauga County,2945,1025.0,2477.0,1749
1,G010001090315,Alabama,Autauga County,2550,1098.0,2018.0,1338
2,G010001092106,Alabama,Autauga County,6401,3106.0,5105.0,2472
3,G010001092628,Alabama,Autauga County,43304,19199.0,33768.0,17756
4,G010003090207,Alabama,Baldwin County,26204,10318.0,20214.0,9976
...,...,...,...,...,...,...,...
36625,G720153080166,Puerto Rico,Yauco Municipio,779,119.0,689.0,421
36626,G720153081069,Puerto Rico,Yauco Municipio,4380,1305.0,3525.0,1611
36627,G720153081155,Puerto Rico,Yauco Municipio,11864,3087.0,9538.0,4722
36628,G720153085541,Puerto Rico,Yauco Municipio,112,9.0,55.0,71


### QAQC

In [4]:
arcpy.management.DeleteField('selected_county.shp','NAMELSAD;GISJOIN','KEEP_FIELDS')

In [9]:
arcpy.SpatialJoin_analysis('PickUp_Apr', 'selected_county', 'PickUp_Apr_county')

In [10]:
# Define a SQL query to select features (adjust as needed)
query = "GISJOIN = ''"

# Select features based on the query
arcpy.SelectLayerByAttribute_management("PickUp_Apr_county", "NEW_SELECTION", query)

# Copy the selected features to a new shapefile
arcpy.CopyFeatures_management("PickUp_Apr_county", "PickUp_Apr_county_selection")
arcpy.management.DeleteField('PickUp_Apr_county_selection.shp','Join_Count;TARGET_FID;NAMELSAD;GISJOIN','DELETE_FIELDS')

In [11]:
query = "GISJOIN = ''"

# Select features based on the query
arcpy.SelectLayerByAttribute_management("PickUp_Apr_county", "NEW_SELECTION", query)
arcpy.SelectLayerByAttribute_management("PickUp_Apr_county", "SWITCH_SELECTION")

arcpy.CopyFeatures_management("PickUp_Apr_county", "PickUp_Apr_county_revselection")

In [12]:
# Spatial join new 
arcpy.SpatialJoin_analysis('PickUp_Apr_county_selection', 'selected_county', 'PickUp_Apr_county_sub',match_option='CLOSEST')

In [13]:
# Merge back
arcpy.Merge_management(['PickUp_Apr_county_revselection', 'PickUp_Apr_county_sub'], 'PickUp_Apr_county_qaqc')

In [14]:
# count summary
arcpy.Statistics_analysis('PickUp_Apr_county_qaqc.shp', 'PickUp_Apr_county_Count.dbf', [["TARGET_FID", "COUNT"]], ["GISJOIN","Hour"])
arcpy.management.DeleteField('PickUp_Apr_county_Count.dbf','COUNT_TARG','DELETE_FIELDS')
arcpy.TableToTable_conversion('PickUp_Apr_county_Count.dbf', arcpy.env.workspace, "PickUp_Apr_county_Count.csv")

In [15]:
#delete unnecessary shapefiles
arcpy.Delete_management('PickUp_Apr_county.shp')
arcpy.Delete_management('PickUp_Apr_county_revselection.shp')
arcpy.Delete_management('PickUp_Apr_county_selection.shp')
arcpy.Delete_management('PickUp_Apr_county_sub.shp')

### EDA & Visualization 

In [16]:
#Time line plot
pickup_count = pd.read_csv(os.path.join(local_directory,'PickUp_Apr_county_Count.csv'))

# Round the timestamp to the nearest 15-minute interval
#pickup_count['Time15MIN'] = pd.to_datetime(pickup_count['Interval']).dt.time

time_summary = pickup_count.groupby('Hour',as_index=False).agg({"FREQUENCY":"mean"})

In [17]:
x = (pd.DataFrame(columns=['NULL'],
                  index=pd.date_range('2023-11-27T00:00:00Z', '2023-11-27T23:59:59Z',
                                      freq='60T'))
       .between_time('00:00','23:00')
       .index.strftime('%Y-%m-%dT%H:%M:%SZ')
       .tolist()
)

x = pd.to_datetime(x) 

y = time_summary["FREQUENCY"].to_list()


myFmt = mdates.DateFormatter('%H:%M')

# plot
fig, ax = plt.subplots()
ax.plot(x, y)

ax.xaxis.set_major_formatter(myFmt)
fig.autofmt_xdate()

plt.show()

In [14]:
#Spatial map
space_summary = pickup_count.groupby('GISJOIN',as_index=False).agg({"FREQUENCY":"mean"})
space_summary.to_csv(os.path.join(local_directory,'space_summary.csv'),index=False)
arcpy.management.AddJoin('selected_county.shp', 'GISJOIN', 'space_summary.csv', 'GISJOIN')


In [15]:
arcpy.CopyFeatures_management("selected_county_Layer1", "PickUp_Apr_county_space")

In [27]:
#spatial-temporal maps
space_time_summary = pickup_count.groupby(['Hour','GISJOIN'],as_index=False).agg({"FREQUENCY":"mean"})
space_time_summary_3 = space_time_summary[space_time_summary['Hour']==3]
space_time_summary_8 = space_time_summary[space_time_summary['Hour']==7]
space_time_summary_12 = space_time_summary[space_time_summary['Hour']==12]
space_time_summary_18 = space_time_summary[space_time_summary['Hour']==17]

space_time_summary_3.to_csv(os.path.join(local_directory,'space_time_summary_3.csv'),index=False)
space_time_summary_8.to_csv(os.path.join(local_directory,'space_time_summary_7.csv'),index=False)
space_time_summary_12.to_csv(os.path.join(local_directory,'space_time_summary_12.csv'),index=False)
space_time_summary_18.to_csv(os.path.join(local_directory,'space_time_summary_17.csv'),index=False)


In [17]:
arcpy.management.AddJoin('selected_county.shp', 'GISJOIN', 'space_time_summary_3.csv', 'GISJOIN')
arcpy.CopyFeatures_management("selected_county_Layer5", "PickUp_Apr_county_spacetime_3")

In [28]:
arcpy.management.AddJoin('selected_county.shp', 'GISJOIN', 'space_time_summary_7.csv', 'GISJOIN')
arcpy.CopyFeatures_management("selected_county_Layer11", "PickUp_Apr_county_spacetime_7")

In [21]:
arcpy.management.AddJoin('selected_county.shp', 'GISJOIN', 'space_time_summary_12.csv', 'GISJOIN')
arcpy.CopyFeatures_management("selected_county_Layer7", "PickUp_Apr_county_spacetime_12")

In [30]:
arcpy.management.AddJoin('selected_county.shp', 'GISJOIN', 'space_time_summary_17.csv', 'GISJOIN')
arcpy.CopyFeatures_management("selected_county_Layer13", "PickUp_Apr_county_spacetime_18")

In [3]:
arcpy.TableToTable_conversion('Neighbor_county.dbf', arcpy.env.workspace, "Neighbor_county.csv")