In [9]:
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
import pydeck as pdk
import numpy as np

In [17]:
# Load the wind turbine locations shape file
wind_turbine_shapefile_path = "../data/uswtdbSHP/uswtdb_v7_0_20240510.shp"
wind_turbine_shapefile = gpd.read_file(wind_turbine_shapefile_path)

# Select only the wind turbines in Texas
wind_turbines_in_texas = wind_turbine_shapefile[wind_turbine_shapefile['t_state'] == 'TX']

# Define a layer to display on a map
layer = pdk.Layer(
    'ScatterplotLayer',
    wind_turbines_in_texas,
    get_position='[xlong, ylat]',
    get_color=[200, 30, 0, 160],
    get_radius=500,)

# Set the viewport location
view_state = pdk.ViewState(
    longitude=wind_turbines_in_texas['xlong'].mean(),
    latitude=wind_turbines_in_texas['ylat'].mean(),
    zoom=6,
    min_zoom=5,
    max_zoom=15)

# Render
r = pdk.Deck(layers=[layer], initial_view_state=view_state)
r.show()

DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{\n  "initialViewState": {…

In [18]:
def safe_convert(val):
    try:
        return int(val[:4])
    except ValueError:
        return np.nan

filtered_df = wind_turbines_in_texas.copy()
filtered_df['faa_asn'] = filtered_df['faa_asn'].apply(safe_convert)
filtered_df = filtered_df[filtered_df['faa_asn'] <= 2012]


layer = pdk.Layer(
    'ScatterplotLayer',
    filtered_df,
    get_position='[xlong, ylat]',  
    get_radius=50,
    get_fill_color='[200, 30, 0]',
    pickable=True,
    auto_highlight=True
)

# Set the viewport location
view_state = pdk.ViewState(
    longitude=filtered_df['xlong'].mean(),
    latitude=filtered_df['ylat'].mean(),
    zoom=6,
    min_zoom=5,
    max_zoom=15)

r = pdk.Deck(layers=[layer], initial_view_state=view_state)
r.show()

r = pdk.Deck(layers=[layer], initial_view_state=view_state)
r.show()

DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{\n  "initialViewState": {…

In [20]:
from sklearn.cluster import DBSCAN

# Extract the coordinates from the GeoDataFrame
points = filtered_df.geometry.apply(lambda point: (point.x, point.y)).tolist()

# Run DBSCAN algorithm
epsilon = 0.1  # Distance threshold for clustering
min_samples = 5  # Minimum number of points required to form a cluster
dbscan = DBSCAN(eps=epsilon, min_samples=min_samples)
labels = dbscan.fit_predict(points)

# Add the cluster labels to the GeoDataFrame
filtered_df['cluster_label'] = labels

# Print the number of clusters found
num_clusters = len(set(labels)) - (1 if -1 in labels else 0)
print(f"Number of clusters found: {num_clusters}")

# Create a scatter plot by assigning different colors to the clusters using pdk

layer = pdk.Layer(
    'ScatterplotLayer',
    filtered_df,
    get_position='[xlong, ylat]',  
    get_radius=50,
    get_fill_color='[200, cluster_label*15, 0]',
    pickable=True,
    auto_highlight=True
)

r = pdk.Deck(layers=[layer], initial_view_state=view_state)
r.show()

Number of clusters found: 34


DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{\n  "initialViewState": {…