# Spatial Analysis

Data expected is the processed TSDC data, stored in a csv

In [None]:
to_data_folder = "../Data" #data folder, where composite data was written from the TSDC_data file
to_boundary_folder = "../Data/muni_boundaries" #where municipality boundary files are published

In [None]:
# %conda install geopandas

If, when importing geopandas, you get the error `ImportError: libtiff.so.5: cannot open shared object file: No such file or directory` you can resolve it by running the command `apt install libtiff5` in the root of the notebook container, if using docker desktop, you can access a terminal from the container, else use `docker exec -it [container id] /bin/sh`. You might need to run `apt-get update` before you can perform the install. 

In [None]:
# Spatial Analysis
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import shapely as shp
from shapely.geometry import shape
from shapely.geometry import MultiPoint
from shapely.geometry import Point

def parse_geom(geom_str):
    try:
        return shape(json.loads(geom_str.replace("'", '"')))
    except (TypeError, AttributeError):  # Handle NaN and empty strings
        return None

In [None]:
trip_program_df = pd.read_csv(to_data_folder + "/tsdc_filtered_merged_trips.csv") #issues here with the columns and data formatting when coming from the TSDC...

trip_program_df["distance_km"] = trip_program_df.distance / 1000

trip_program_df.sample(n=50, random_state=123)[["program", "perno", "_id", "data_start_fmt_time", "data_end_fmt_time", "distance_km", "Mode_confirm", "data_start_loc_coordinates"]]

In [None]:
#assemble the points
trip_program_df["start_loc"] = "{'type': 'Point', 'coordinates': " + trip_program_df['data_start_loc_coordinates'] + "}"
trip_program_df["end_loc"] = "{'type': 'Point', 'coordinates': " + trip_program_df['data_end_loc_coordinates'] + "}"

trip_program_df["start_loc"] = trip_program_df["start_loc"].apply(parse_geom)
trip_program_df["end_loc"] = trip_program_df["end_loc"].apply(parse_geom)
trip_program_df.head()

In [None]:
trip_program_df['start_end'] = trip_program_df.apply(lambda row: MultiPoint([row['start_loc'], row['end_loc']]), axis=1) #Create a multipoint column

trip_program_df_gdf = gpd.GeoDataFrame(trip_program_df, geometry="start_end")
trip_program_df_gdf.crs is None
trip_program_df_gdf = trip_program_df_gdf.set_crs(4269, allow_override=True)
trip_program_df_gdf.crs

## Data Preparation

In [None]:
trip_program_df_gdf.rename(columns={"data_user_input_mode_confirm": "mode_confirm"}, inplace = True)

e_bike_trips = trip_program_df_gdf[trip_program_df_gdf.mode_confirm == 'pilot_ebike']
print(len(e_bike_trips), "ebike trips")

denver_boundary = gpd.read_file(to_boundary_folder + "/denver_uza_akcicek.shp")

e_bike_start_end_points = pd.concat([e_bike_trips.start_loc, e_bike_trips.end_loc]); 
print(len(e_bike_start_end_points))
e_bike_geo_start_end_points = gpd.GeoSeries(e_bike_start_end_points.apply(lambda p: shp.geometry.Point(p.coords)), crs="EPSG:4269")
e_bike_start_or_end_within = e_bike_geo_start_end_points[e_bike_geo_start_end_points.within(denver_boundary.geometry.iloc[0])]

In [None]:
car_like_trips = trip_program_df_gdf.query('mode_confirm == "drove_alone" | mode_confirm == "shared_ride" | mode_confirm == "taxi"')
print(len(car_like_trips))
car_like_start_end_points = pd.concat([car_like_trips.start_loc, car_like_trips.end_loc])

car_like_geo_start_end_points = gpd.GeoSeries(car_like_start_end_points.apply(lambda p: shp.geometry.Point(p.coords)), crs="EPSG:4269")
car_like_start_or_end_within = car_like_geo_start_end_points[car_like_geo_start_end_points.within(denver_boundary.geometry.iloc[0])]

### preparing the boundaries

In [None]:
def prepare_boundary(filename):
    boundary = gpd.read_file(to_boundary_folder + filename)
    boundary.plot()
    return boundary

In [None]:
boulder_boundary = prepare_boundary("/boulder.shp")
durango_boundary = prepare_boundary("/durango.shp")
fort_collins_boundary = prepare_boundary("/fort_collins.shp")
vail_boundary = prepare_boundary("/vail.shp")
pueblo_boundary = prepare_boundary("/pueblo.shp")
denver_boundary = prepare_boundary("/denver_uza_akcicek.shp")

### gathering data within the boundaries

In [None]:
#denver is particular to smart commute - don't want to cloud with people who visited denver from other places
all_sc_within_denver = trip_program_df_gdf[(trip_program_df_gdf.within(denver_boundary.geometry.iloc[0])) & (trip_program_df_gdf.program == "sc")]

all_within_boulder = trip_program_df_gdf[trip_program_df_gdf.within(boulder_boundary.geometry.iloc[0])]
all_within_durango = trip_program_df_gdf[trip_program_df_gdf.within(durango_boundary.geometry.iloc[0])]
all_within_fortcollins = trip_program_df_gdf[trip_program_df_gdf.within(fort_collins_boundary.geometry.iloc[0])]
all_within_vail = trip_program_df_gdf[trip_program_df_gdf.within(vail_boundary.geometry.iloc[0])]
all_within_pueblo = trip_program_df_gdf[trip_program_df_gdf.within(pueblo_boundary.geometry.iloc[0])]

### splitting out car trips and e-bike trips to compare

In [None]:
def trips_in_border(boundary):
    car_trips = car_like_trips[car_like_trips.within(boundary.geometry.iloc[0])]
    bike_trips = e_bike_trips[e_bike_trips.within(boundary.geometry.iloc[0])]
    
    return car_trips, bike_trips

In [None]:
#denver -- only Smart Commute data!!
car_like_start_or_end_within_denver = all_sc_within_denver.query('mode_confirm == "drove_alone" | mode_confirm == "shared_ride" | mode_confirm == "taxi"')
e_bike_start_or_end_within_denver = all_sc_within_denver[all_sc_within_denver.mode_confirm == 'pilot_ebike']

#boulder
car_like_start_or_end_within_boulder, e_bike_start_or_end_within_boulder = trips_in_border(boulder_boundary)

#durango
car_like_start_or_end_within_durango, e_bike_start_or_end_within_durango = trips_in_border(durango_boundary)

#fort collins
car_like_start_or_end_within_fortcollins, e_bike_start_or_end_within_fortcollins = trips_in_border(fort_collins_boundary)

#vail
car_like_start_or_end_within_vail, e_bike_start_or_end_within_vail = trips_in_border(vail_boundary.geometry)

#pueblo
car_like_start_or_end_within_pueblo, e_bike_start_or_end_within_pueblo = trips_in_border(pueblo_boundary)

### bonus plots [all, e-bike, car]

In [None]:
def bonus_plots(area_boundary, all_within, e_bikes, cars):
    fig, ax_arr = plt.subplots(nrows=1, ncols=3, figsize=(15,15), sharex=True, sharey=True)
    area_boundary.boundary.plot(ax=ax_arr[0], alpha = 0.2, color = "black", edgecolor='black')
    all_within.plot(ax=ax_arr[0], markersize=1)
    area_boundary.boundary.plot(ax=ax_arr[1], alpha = 0.2, color = "black", edgecolor='black')
    e_bikes.plot(color="#28a745", ax=ax_arr[1], markersize=1)
    area_boundary.boundary.plot(ax=ax_arr[2], alpha = 0.2, color = "black", edgecolor='black')
    cars.plot(color="#dc3545", ax=ax_arr[2], markersize=1)

In [None]:
#boulder
bonus_plots(boulder_boundary, all_within_boulder, e_bike_start_or_end_within_boulder, car_like_start_or_end_within_boulder)

In [None]:
#durango
bonus_plots(durango_boundary, all_within_durango, e_bike_start_or_end_within_durango, car_like_start_or_end_within_durango)

In [None]:
#fort collins
bonus_plots(fort_collins_boundary, all_within_fortcollins, e_bike_start_or_end_within_fortcollins, car_like_start_or_end_within_fortcollins)

In [None]:
#vail
bonus_plots(vail_boundary, all_within_vail, e_bike_start_or_end_within_vail, car_like_start_or_end_within_vail)

In [None]:
#pueblo
bonus_plots(pueblo_boundary, all_within_pueblo, e_bike_start_or_end_within_pueblo, car_like_start_or_end_within_pueblo)

In [None]:
#denver
bonus_plots(denver_boundary, all_sc_within_denver, e_bike_start_or_end_within_denver, car_like_start_or_end_within_denver)

### Preparing for block/pixel analysis

In [None]:
def prepare_blocks_pixels(block_filename, pixel_filename):
    blocks = gpd.read_file(to_boundary_folder + block_filename)
    pixels = gpd.read_file(to_boundary_folder + pixel_filename)
    return blocks, pixels

In [None]:
#read in block and pixel files
denver_blocks, denver_pixels = prepare_blocks_pixels("/denver_blocks.shp", "/denver_pixels_ca.shp")
boulder_blocks, boulder_pixels = prepare_blocks_pixels("/boulder_blocks.shp", "/boulder_pixels.shp")
fort_collins_blocks, fort_collins_pixels = prepare_blocks_pixels("/fort_collins_blocks.shp", "/fortcollins_pixels.shp")
vail_blocks, vail_pixels = prepare_blocks_pixels("/vail_blocks.shp", "/vail_pixels.shp")
durango_blocks, durango_pixels = prepare_blocks_pixels("/durango_blocks.shp", "/durango_pixels.shp")
pueblo_blocks, pueblo_pixels = prepare_blocks_pixels("/pueblo_blocks.shp", "/pueblo_pixels.shp")

### data processing for e-bike : cars ratios [pixels and blocks]

In [None]:
## set up the get counts -- note the "within place" used to pull from -- denver's only inclueds smart commute
def get_counts(pixel_polygon, all_within, all_bikes, all_cars):
    all_trip_count = np.count_nonzero(all_within.within(pixel_polygon))
    e_bike_trip_count = np.count_nonzero(all_bikes.within(pixel_polygon))
    car_like_trip_count = np.count_nonzero(all_cars.within(pixel_polygon))
    return pd.Series([all_trip_count, e_bike_trip_count, car_like_trip_count])

In [None]:
def get_denver_counts(pixel_polygon):
    return get_counts(pixel_polygon, all_sc_within_denver, e_bike_start_or_end_within_denver, car_like_start_or_end_within_denver)

def get_counts_boulder(pixel_polygon):
    return get_counts(pixel_polygon, all_within_boulder, e_bike_start_or_end_within_boulder, car_like_start_or_end_within_boulder)
    
def get_counts_fortcollins(pixel_polygon):
    return get_counts(pixel_polygon, all_within_fortcollins, e_bike_start_or_end_within_fortcollins, car_like_start_or_end_within_fortcollins)

def get_counts_vail(pixel_polygon):
    return get_counts(pixel_polygon, all_within_vail, e_bike_start_or_end_within_vail, car_like_start_or_end_within_vail)
    
def get_counts_durango(pixel_polygon):
    return get_counts(pixel_polygon, all_within_durango, e_bike_start_or_end_within_durango, car_like_start_or_end_within_durango)
    
def get_counts_pueblo(pixel_polygon):
    return get_counts(pixel_polygon, all_within_pueblo, e_bike_start_or_end_within_pueblo, car_like_start_or_end_within_pueblo)

In [None]:
#get the counts (blocks)
denver_blocks[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = denver_blocks.geometry.apply(lambda pp: get_denver_counts(pp))
boulder_blocks[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = boulder_blocks.geometry.apply(lambda pp: get_counts_boulder(pp))
fort_collins_blocks[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = fort_collins_blocks.geometry.apply(lambda pp: get_counts_fortcollins(pp))
vail_blocks[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = vail_blocks.geometry.apply(lambda pp: get_counts_vail(pp))
durango_blocks[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = durango_blocks.geometry.apply(lambda pp: get_counts_durango(pp))
pueblo_blocks[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = pueblo_blocks.geometry.apply(lambda pp: get_counts_pueblo(pp))

In [None]:
#get the counts (pixels)
denver_pixels[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = denver_pixels.geometry.apply(lambda pp: get_denver_counts(pp))
boulder_pixels[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = boulder_pixels.geometry.apply(lambda pp: get_counts_boulder(pp))
fort_collins_pixels[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = fort_collins_pixels.geometry.apply(lambda pp: get_counts_fortcollins(pp))
vail_pixels[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = vail_pixels.geometry.apply(lambda pp: get_counts_vail(pp))
durango_pixels[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = durango_pixels.geometry.apply(lambda pp: get_counts_durango(pp))
pueblo_pixels[["all_trip_count", "e_bike_trip_count", "car_like_trip_count"]] = pueblo_pixels.geometry.apply(lambda pp: get_counts_pueblo(pp))

In [None]:
def process_ratios(blocks, pixels, area_boundary):
    #get ratio of bikes to cars (blocks)
    blocks["e_bike_2_car_like"] = blocks.e_bike_trip_count / blocks.car_like_trip_count
    #get ratio of bikes to cars (pixels)
    pixels["e_bike_2_car_like"] = pixels.e_bike_trip_count / pixels.car_like_trip_count
    #replacing infinity with max (blocks)
    blocks.replace(np.inf, blocks.replace(np.inf, 0).e_bike_2_car_like.max(), inplace=True)
    #replacing infinity with max (pixels)
    pixels.replace(np.inf, pixels.replace(np.inf, 0).e_bike_2_car_like.max(), inplace=True)
    #calculate where e-bike > car (blocks) -- put in T/F
    blocks["e_bike_better"] = pd.Categorical(blocks.e_bike_2_car_like >= 1)
    #calculate where e-bike is > car (pixels) -- put in T/F
    pixels["e_bike_better"] = pd.Categorical(pixels.e_bike_2_car_like >= 1)
    #remove nan ratios in blocks
    blocks_2 = blocks[blocks['e_bike_2_car_like'].notna()]
    #remove nan ratios in pixels
    pixels_2 = pixels[pixels['e_bike_2_car_like'].notna()]
    
    #make the plot
    ax = area_boundary.boundary.plot()
    pixels_2.replace({False: "e-bike < car", True: "e-bike > car"}, inplace=True)
    pixels_2.plot(column = "e_bike_better", ax = ax, legend=True, legend_kwds={"loc": "lower left"}, categorical=True, cmap="Paired", figsize=(12,6))
    
    return blocks_2, pixels_2
    

In [None]:
#denver
denver_blocks_2, denver_pixels_2 = process_ratios(denver_blocks, denver_pixels, denver_boundary)

In [None]:
#boulder
boulder_blocks_2, boulder_pixels_2 = process_ratios(boulder_blocks, boulder_pixels, boulder_boundary)

In [None]:
#fort collins
fort_collins_blocks_2, fort_collins_pixels_2 = process_ratios(fort_collins_blocks, fort_collins_pixels, fort_collins_boundary)

In [None]:
#vail
vail_blocks_2, vail_pixels_2 = process_ratios(vail_blocks, vail_pixels, vail_boundary)

In [None]:
#durango
durango_blocks_2, durango_pixels_2 = process_ratios(durango_blocks, durango_pixels, durango_boundary)

In [None]:
#pueblo
pueblo_blocks_2, pueblo_pixels_2 = process_ratios(pueblo_blocks, pueblo_pixels, pueblo_boundary)

## Draw up the pixel plots

Figure 21

# Pixel Proportions

Figure #22

In [None]:
def orgainze_ratios(pixels, program_name):
    area = pixels.dropna().groupby("e_bike_better").count()
    area[program_name] = area['id'] / sum(area['id'])
    
    return area

In [None]:
denver = orgainze_ratios(denver_pixels_2, 'Smart Commute\n(Denver North)')
boulder = orgainze_ratios(boulder_pixels_2, 'Comunity Cycles\n(Boulder)')
collins = orgainze_ratios(fort_collins_pixels_2,'Fort Collins')
durango = orgainze_ratios(durango_pixels_2, 'Four Corners\n(Durango)')
vail = orgainze_ratios(vail_pixels_2, 'Vail')
pueblo = orgainze_ratios(pueblo_pixels_2,'Pueblo')

In [None]:
pixels = pd.concat([denver, boulder, collins, durango, vail, pueblo], axis = 1)

pixels = pixels.drop(columns = ['id', 'geometry', 'left', 'top', 'right', 'bottom', 'all_trip_count', 'e_bike_trip_count', 'car_like_trip_count', 'e_bike_2_car_like'])

pixels = pixels.transpose()

#convert to %
pixels['e-bike < car'] = pixels['e-bike < car'] * 100
pixels['e-bike > car'] = pixels['e-bike > car'] * 100

pixels = pixels[['e-bike > car', 'e-bike < car']]

pixels

In [None]:
ax = pixels.plot.bar(stacked=True,
                            title="Where E-bikes Used More Than Cars", 
                            ylabel="Proportion of Pixels (%)",
                            xlabel = "Program",)
for c in ax.containers:
    labels = [f'{round(v.get_height(),1)}' for v in c]
    ax.bar_label(c, labels=labels, label_type='center')
    
ax.set_xticklabels(pixels.index, rotation=45, ha='right')