In [1]:
import numpy as np
import pandas as pd
import overpy
import geopandas as gpd
import ee
from geetools import batch
import requests
from shapely.geometry import Polygon, box
import os
import rasterio
import rasterio.plot
import matplotlib.pyplot as plt


## OpenStreetMap API

In [2]:
api = overpy.Overpass()

# Fetch all wastewater treatment plants within California's boundary
query = f"""
    area[admin_level=4]["name"="California"]->.searchArea;
    (
      way["man_made"="wastewater_plant"](area.searchArea);
    );
    (._;>;);
    out body;
    """

result = api.query(query)

# Store results in desired format
plants = {}

for way in result.ways:
    # Use the name of the plant or its ID if the name is not available
    plant_name = way.tags.get("name", f"Plant_{way.id}")

    # Extract nodes lat and lon without id
    nodes_coords = [(node.lon, node.lat) for node in way.nodes]

    # get rid of "Decimal"  in the coordinates
    nodes_coords = [tuple(map(float, i)) for i in nodes_coords]
    
    plants[plant_name] = nodes_coords


In [5]:
geoms = [Polygon(plants[key]) for key in plants]
df = gpd.GeoDataFrame({'WWTP_name': list(plants.keys()), 'geometry': geoms}, crs="EPSG:4326")
df

Unnamed: 0,WWTP_name,geometry
0,Plant_24298754,"POLYGON ((-121.80940 37.69305, -121.80582 37.6..."
1,Plant_24396910,"POLYGON ((-121.78360 36.80423, -121.78363 36.8..."
2,EBMUD Wastewater Treatment Plant,"POLYGON ((-122.29310 37.82285, -122.29238 37.8..."
3,Hyperion Wastewater Treatment Plant,"POLYGON ((-118.42914 33.91957, -118.43162 33.9..."
4,San José–Santa Clara Regional Wastewater Facility,"POLYGON ((-121.95539 37.43062, -121.95452 37.4..."
...,...,...
3101,Plant_1188491467,"POLYGON ((-121.52373 38.56623, -121.52238 38.5..."
3102,Plant_1201733978,"POLYGON ((-118.94698 37.64148, -118.94744 37.6..."
3103,Plant_1205552620,"POLYGON ((-121.96382 38.68125, -121.96450 38.6..."
3104,Plant_1213883271,"POLYGON ((-120.46290 38.12344, -120.46263 38.1..."


In [6]:
df["centroid"] = df.to_crs('+proj=cea').centroid.to_crs(epsg=4326)

In [7]:
df

Unnamed: 0,WWTP_name,geometry,centroid
0,Plant_24298754,"POLYGON ((-121.80940 37.69305, -121.80582 37.6...",POINT (-121.80762 37.69120)
1,Plant_24396910,"POLYGON ((-121.78360 36.80423, -121.78363 36.8...",POINT (-121.78025 36.79990)
2,EBMUD Wastewater Treatment Plant,"POLYGON ((-122.29310 37.82285, -122.29238 37.8...",POINT (-122.29525 37.82512)
3,Hyperion Wastewater Treatment Plant,"POLYGON ((-118.42914 33.91957, -118.43162 33.9...",POINT (-118.43006 33.92574)
4,San José–Santa Clara Regional Wastewater Facility,"POLYGON ((-121.95539 37.43062, -121.95452 37.4...",POINT (-121.94663 37.43156)
...,...,...,...
3101,Plant_1188491467,"POLYGON ((-121.52373 38.56623, -121.52238 38.5...",POINT (-121.52255 38.56391)
3102,Plant_1201733978,"POLYGON ((-118.94698 37.64148, -118.94744 37.6...",POINT (-118.94450 37.63989)
3103,Plant_1205552620,"POLYGON ((-121.96382 38.68125, -121.96450 38.6...",POINT (-121.96297 38.67961)
3104,Plant_1213883271,"POLYGON ((-120.46290 38.12344, -120.46263 38.1...",POINT (-120.46110 38.12511)


In [11]:
df_yj_list = pd.read_csv('Yuanjing_image_filenames.csv')
df_yj_yes = df_yj_list.loc[df_yj_list['Bucket'] == "Yes", :]
df_yj_yes

Unnamed: 0,plant name,Bucket,comment,solar,All Black
22,Plant_668191389,Yes,,,
33,Redding Stillwater Treatment Plant,Yes,,,
40,Susanville CSD Wastewater Treatment Plant,Yes,,,
50,Plant_630524137,Yes,,,
90,Wastewater Treatment Plant,Yes,,,
110,Plant_827921406,Yes,,Yes,
117,Plant_906341229,Yes,,,
156,Plant_896974123,Yes,,Yes,
163,Plant_438360048,Yes,,Yes,
181,Plant_723852046,Yes,,,


In [13]:
gdf_yj_list = df.loc[df['WWTP_name'].isin(df_yj_yes['plant name']), :]
gdf_yj_list

Unnamed: 0,WWTP_name,geometry,centroid
5,Sunnyvale Water Pollution Control Plant,"POLYGON ((-122.01815 37.41911, -122.01795 37.4...",POINT (-122.01527 37.41897)
11,Wastewater Treatment,"POLYGON ((-122.03363 36.96278, -122.03345 36.9...",POINT (-122.03101 36.96173)
13,Wastewater Treatment Plant,"POLYGON ((-121.53986 36.98641, -121.53838 36.9...",POINT (-121.53261 36.98365)
14,Regional Water Quality Control Plant,"POLYGON ((-122.11392 37.45325, -122.11300 37.4...",POINT (-122.11103 37.45224)
31,Sewer Plant #3,"POLYGON ((-119.07450 35.27432, -119.07805 35.2...",POINT (-119.08379 35.27696)
33,Sewer Plant #2,"POLYGON ((-118.96754 35.32524, -118.97646 35.3...",POINT (-118.96802 35.33247)
2344,Plant_87774803,"POLYGON ((-121.55257 39.06739, -121.55224 39.0...",POINT (-121.55049 39.06513)
2609,Plant_118108148,"POLYGON ((-117.26675 33.86623, -117.26193 33.8...",POINT (-117.26431 33.86801)
2826,Ukiah Wastewater Treatment Plant,"POLYGON ((-123.19353 39.11405, -123.18446 39.1...",POINT (-123.18812 39.11196)
2856,Ryder Street treatment plant,"POLYGON ((-122.24931 38.09061, -122.24949 38.0...",POINT (-122.25123 38.09189)


In [14]:
# visualize the centroid of the plants in the map
import folium
import branca
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster

# Create a map
m = folium.Map(location=[37, -120], zoom_start=6)

# Add points to the map
mc = MarkerCluster()
for idx, row in gdf_yj_list.iterrows():
    mc.add_child(folium.Marker(location=[row['centroid'].y, row['centroid'].x]))
m.add_child(mc)

# Display the map
m


In [15]:
# save as interactive html
m.save('WWTP_centroid.html')