# DSC 180A Quarter 1 Project Code

Wildfile Mitigation

By: Gloria Kao, Shentong Li

Outputs (tables, aggregated data, graphs, etc.) are commented out and not shown because of NDA.

## 1. EDA

EDA and merging of weather station datasets 

In [None]:
import numpy as np 
import pandas as pd
import networkx as nx

# pacakges for geospatial analysis and plotting
import geopandas as gpd
from shapely.geometry import Point

import folium
from folium.plugins import HeatMap

import seaborn as sns
import matplotlib.pyplot as plt

We have 5 datasets in total. We focus on 3 of them first: 

1. `gis_weatherstation_shape_2024_10_04.csv`: Information of weather stations such as names, location, structure details, etc.
2. `src_wings_meteorology_station_summary_snapshot_2023_08_02.csv`: Meteorology data for each weather stations such as max gust and alert windspeed. 
3. `src_wings_meteorology_windspeed_snapshot_2023_08_02.csv`: Windspeed snapshots collected from weather stations, ranging from years 2012 to 2022. 

In [None]:
gis_2024_1004 = pd.read_csv('data/gis_weatherstation_shape_2024_10_04.csv')
station_summary_2023_08_02 = pd.read_csv('data/src_wings_meteorology_station_summary_snapshot_2023_08_02.csv')
windspeed_2023_08_02 = pd.read_csv('data/src_wings_meteorology_windspeed_snapshot_2023_08_02.csv')

### 1.1 Table 1 - GIS 2024_10_04
#### 1.1.1 Basic Summary Stats

In [None]:
gis_2024_1004

In [None]:
gis_2024_1004.columns

In [None]:
station_location = gis_2024_1004[['weatherstationcode', 'latitude', 'longitude']]
station_location

In [None]:
gis_2024_1004.describe()

In [None]:
# check null
gis_2024_1004.isnull().sum()

In [None]:
# num of stations contained
gis_2024_1004['weatherstationname'].nunique()

In [None]:
duplicate_stations = gis_2024_1004[gis_2024_1004.duplicated(subset=['weatherstationname'], keep=False)]
duplicate_stations

In [None]:
# count of each values in 'nwszone'
gis_2024_1004['nwszone'].value_counts()

#### 1.1.2 Geospatial Analysis
Show the details of each station by clicking on the icon in the map.

In [None]:
map_center = [gis_2024_1004['latitude'].mean(), gis_2024_1004['longitude'].mean()]
m1 = folium.Map(location=map_center, zoom_start=10)

# Add weather station points to the map
for _, row in gis_2024_1004.iterrows():
    # Create a popup with relevant information
    popup_text = f"""
    Weather Station: {row['weatherstationname']}<br>
    Elevation: {row['elevation']} m<br>
    NWS Zone: {row['nwszone']}<br>
    Structure ID: {row['structureid']}<br>
    """
    
    # Add a marker for each weather station
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=folium.Popup(popup_text, max_width=300),
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(m1)


boundary_coords = [
    (gis_2024_1004['latitude'].min(), gis_2024_1004['longitude'].min()),
    (gis_2024_1004['latitude'].min(), gis_2024_1004['longitude'].max()),
    (gis_2024_1004['latitude'].max(), gis_2024_1004['longitude'].max()),
    (gis_2024_1004['latitude'].max(), gis_2024_1004['longitude'].min())
]

# boundary box
# folium.Polygon(locations=boundary_coords, color='green', fill=True, fill_opacity=0.2).add_to(m1)

# m1.save('weather_stations_with_area_map.html')
m1

### 1.2 Table 2 - Station Summary 2023_08_02
#### 1.2.1 Basic Summary Statistics

In [None]:
station_summary_2023_08_02

In [None]:
station_summary_2023_08_02.describe()

In [None]:
# distribution graphs 
sns.histplot(station_summary_2023_08_02['max_gust'], bins=10, kde=True)
plt.title('Distribution of Maximum Gusts')
plt.xlabel('Max Gust (mph)')
plt.ylabel('Frequency')
plt.show()
# plt.close()


sns.histplot(station_summary_2023_08_02['99th'], bins=10, kde=True)
plt.title('Distribution of 99th Percentile Gusts')
plt.xlabel('99th Percentile Gust (mph)')
plt.ylabel('Frequency')
plt.show()
# plt.close()


sns.histplot(station_summary_2023_08_02['95th'], bins=10, kde=True)
plt.title('Distribution of 95th Percentile Gusts')
plt.xlabel('99th Percentile Gust (mph)')
plt.ylabel('Frequency')
plt.show()
# plt.close()

In [None]:
sns.countplot(x='vri', data=station_summary_2023_08_02)
plt.title('VRI (Risk Classification) Distribution')
plt.xlabel('VRI (H = High, M = Medium, L = Low)')
plt.ylabel('Count of Stations')
plt.show()
# plt.close()

# Bar plot for Alert Levels
sns.countplot(x='alert', data=station_summary_2023_08_02)
plt.title('Alert Level Distribution')
plt.xlabel('Alert Level')
plt.ylabel('Count of Stations')
plt.show()
# plt.close()

### 1.2.2 Merging datasets

In [None]:
# merging the two datasets about weather stations together
merged_df = pd.merge(station_summary_2023_08_02, gis_2024_1004, right_on= 'weatherstationcode', left_on='station', how='left')
merged_df

In [None]:
bins = range(0, 5800, 400)  
labels = [f'Group{i+1}: {bins[i]}-{bins[i+1]}' for i in range(len(bins)-1)]  # Create group labels

# Assign the binned elevation groups
merged_df['elevation_group'] = pd.cut(merged_df['elevation'], bins=bins, labels=labels)


elevation_vri_grouped = merged_df.groupby('elevation_group')['vri'].value_counts().unstack().fillna(0)
elevation_vri_grouped.plot(kind='bar', stacked=True, cmap='viridis')
plt.title('VRI (Risk Classification) Across Elevation Groups', fontsize=14)
plt.xlabel('Elevation Groups', fontsize=12)
plt.ylabel('Number of Stations', fontsize=12)
plt.legend(title='VRI Levels', loc='upper right')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
# plt.close()

In [None]:
vri_weights = {'H': 3, 'M': 2, 'L': 1}
merged_df['vri_weight'] = merged_df['vri'].map(vri_weights)

# Check for missing values and remove rows with NaN in latitude, longitude, or vri_weight
cleaned_df = merged_df.dropna(subset=['latitude', 'longitude', 'vri_weight'])

# Create a list of [latitude, longitude, weight] for the heatmap
heat_data = [[row['latitude'], row['longitude'], row['vri_weight']] for index, row in cleaned_df.iterrows()]

# Create a folium map centered around the average coordinates of the data
m = folium.Map(location=[cleaned_df['latitude'].mean(), cleaned_df['longitude'].mean()], zoom_start=10)

# Add the heatmap layer
HeatMap(heat_data, min_opacity=0.2, radius=20, blur=15, max_zoom=1).add_to(m)

# Save the map to an HTML file and display it
# m.save('geospatial_risk_heatmap.html')

# If running in Jupyter or similar environments, you can display the map directly
m

### 1.3 Table 3 - Windspeed 2023_08_02
#### 1.3.1 Basic Summary Stat

In [None]:
windspeed_2023_08_02_edit = windspeed_2023_08_02.reset_index().drop(columns=['index'])
windspeed_2023_08_02_edit['date'] = pd.to_datetime(windspeed_2023_08_02_edit['date'], format='%m/%d/%Y')
windspeed_2023_08_02_edit

In [None]:
station_summary = windspeed_2023_08_02_edit.groupby('station')['wind_speed'].describe()
station_summary_edit = station_summary.reset_index()
station_summary_edit

In [None]:
location_wind_speed_merge = pd.merge(station_location, station_summary, left_on='weatherstationcode', right_on='station', how='right')
location_wind_speed_merge_edit = location_wind_speed_merge.drop(columns=['weatherstationcode'])
location_wind_speed_merge_edit

In [None]:
matrix = location_wind_speed_merge_edit.corr()
sns.heatmap(matrix, cmap="Greens", annot=True)

> Seems that there is a correlation between the wind speed and the longitude. 

In [None]:
windspeed_2023_08_02_group = windspeed_2023_08_02.groupby('station')['wind_speed'].mean()
windspeed_2023_08_02_group

In [None]:
# Histogram for wind speed distribution
plt.figure(figsize=(10, 6))
sns.histplot(windspeed_2023_08_02_edit['wind_speed'], bins=20, kde=True)
plt.title('Distribution of Wind Speeds')
plt.xlabel('Wind Speed (mph)')
plt.ylabel('Frequency')
plt.show()
# plt.close()

> We have an outlier of windspeed over 600mph.

In [None]:
windspeed_2023_08_02[windspeed_2023_08_02['wind_speed'] > 600]

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='station', y='wind_speed', data=windspeed_2023_08_02_edit)
plt.title('Wind Speed Distribution by Station')
plt.xticks(rotation=90)
plt.xlabel('Station')
plt.ylabel('Wind Speed (mph)')
plt.show()
# plt.close()

#### 1.3.2 Analysis of windspeed over time 

In [None]:
windspeed_2023_08_02_edit['month'] = windspeed_2023_08_02_edit['date'].dt.month

month_summary = windspeed_2023_08_02_edit.groupby('month')['wind_speed'].describe()
month_summary

In [None]:
plt.figure(figsize=(14, 6))
sns.lineplot(x='date', y='wind_speed', data=windspeed_2023_08_02_edit)
plt.title('Wind Speed Over Time (All Stations)')
plt.xlabel('Date')
plt.ylabel('Wind Speed (mph)')
plt.show()
# plt.close()

In [None]:
windspeed_2023_08_02_edit['date'] = pd.to_datetime(windspeed_2023_08_02_edit['date'])

# Extract month and year from the date
windspeed_2023_08_02_edit['month'] = windspeed_2023_08_02_edit['date'].dt.month
windspeed_2023_08_02_edit['year'] = windspeed_2023_08_02_edit['date'].dt.year

# Boxplot to show wind speed by month
plt.figure(figsize=(12, 6))
sns.boxplot(x='month', y='wind_speed', data=windspeed_2023_08_02_edit)
plt.title('Wind Speed by Month')
plt.xlabel('Month')
plt.ylabel('Wind Speed (mph)')
plt.show()
# plt.close()

In [None]:
seasonal_corr = windspeed_2023_08_02_edit.groupby('month')['wind_speed'].mean()

# Plot the average wind speed for each month
plt.figure(figsize=(12, 6))
seasonal_corr.plot(kind='bar')
plt.title('Average Wind Speed by Month')
plt.xlabel('Month')
plt.ylabel('Average Wind Speed (mph)')
plt.show()
# plt.close()

## 2. Probability of Weather Stations

Calculating PSPS Probability of weather stations and displaying results

In [None]:
# not all stations have the same number of windspeed records
windspeed_grouped_count = windspeed_2023_08_02.groupby(by='station').count()
windspeed_grouped_count

In [None]:
station_codes = np.array(gis_2024_1004['weatherstationcode'])
merged_station_df = gis_2024_1004.merge(station_summary_2023_08_02, left_on='weatherstationcode', right_on='station', how='left')

In [None]:
# example: showing the windspeed alert threshold for the station "AMO"
merged_df[merged_df['weatherstationcode']=='AMO']['alert'].iloc[0]

In [None]:
# getting the PSPS probabilities of all weather stations
prob_lst = []

for station in station_codes:
    station_windspeeds = np.array(windspeed_2023_08_02[windspeed_2023_08_02['station'] == station]['wind_speed'])
    # "alert" might be nan because of less entries in station_ss_df 
    has_threshold = True
    try: 
        threshold = merged_df[merged_df['weatherstationcode'] == station]['alert'].iloc[0]
    except:
        has_threshold = False
        prob = np.nan
    mean = np.nanmean(station_windspeeds)
    if has_threshold:
        prob = np.mean([1 if x >= threshold else 0 for x in station_windspeeds]) * 100
    count = np.count_nonzero(~np.isnan(station_windspeeds))
    prob_lst.append([station, station_windspeeds, threshold, count, mean, prob])

In [None]:
# viewing the probabilities as a dataframe
prob_df = pd.DataFrame(prob_lst)
prob_df.columns = ['station', 'windspeeds', 'threshold', 'count', 'mean', 'probability (%)']
prob_df

In [None]:
print('max prob: ' + str(prob_df['probability (%)'].max()))
print('min prob: ' + str(prob_df['probability (%)'].min()))

In [None]:
# station mismatches between table 1 and table 2
prob_mismatch = prob_df[prob_df['count'] == 0]

In [None]:
# sort probability high to low
prob_sorted = prob_df.sort_values(by='probability (%)', ascending=False)[:-5]

In [None]:
# stations with less than 50 windspeed records
prob_less50 = prob_df[prob_df['count'] <50].sort_values(by='count', ascending=True)

In [None]:
def dist_boxplot(station):
    plt.figure(figsize =(4, 4))
    subset = np.array(windspeed_2023_08_02[windspeed_2023_08_02['station'] == station]['wind_speed'])
    sns.boxplot(subset, width=0.2)
    threshold = prob_df[prob_df['station'] == station]['threshold'].iloc[0]
    plt.axhline(threshold)
    prob = prob_df[prob_df['station'] == station]['probability (%)'].iloc[0]
    plt.text(x=0, y=38, s=f'probability: ' + str(prob), color='red')
    plt.title(station)
    plt.show()

In [None]:
# can run a loop to show all stations distribtuion
# for station in station_codes:
#     dist_boxplot(station)

# showing an example for station "AMO"
dist_boxplot("AMO")

## 3. Geospatial Visualization

Merging weather station data to conductor spans and displaying PSPS Probabilities across all 3 layers geospatially

### 3.1 New datasets

Here we introduce the 2 other datasets. They have GeoPandas "shape" attributes. 

4. `src_vri_snapshot_2024_03_20.csv`: Geospatial data and risk category for the Vegetation Risk Index (VRI) polygons.
5. `dev_wings_agg_span_2024_01_01.csv`: Information of conductor spans such as location, structure details, associates weather station, etc.

In [None]:
vri_df = pd.read_csv('data/src_vri_snapshot_2024_03_20.csv')
span_df = pd.read_csv('data/dev_wings_agg_span_2024_01_01.csv')

In [None]:
vri_df.head()

In [None]:
vri_df.columns

#### 3.1.1 Changing shape columns to geometry type

Currently, the `shape` column datatype is `str` when it should be geometry

Also need to reproject to the same `shape_srid` ESPG:4326

In [None]:
# changing 'shape' columns to gemoetry type and setting CRS to ESPG 4326

gis_2024_1004['shape'] = gpd.GeoSeries.from_wkt(gis_2024_1004['shape'])
gis_gdf = gpd.GeoDataFrame(gis_2024_1004, geometry='shape').set_crs(epsg=4431).to_crs(epsg=4326)

vri_df['shape'] = gpd.GeoSeries.from_wkt(vri_df['shape'])
vri_gdf = gpd.GeoDataFrame(vri_df, geometry='shape').set_crs(epsg=4326)

span_df['shape'] = gpd.GeoSeries.from_wkt(span_df['shape'])
span_gdf = gpd.GeoDataFrame(span_df, geometry='shape').set_crs(epsg=2230).to_crs(epsg=4326)

In [None]:
# drop the shape_srid columns since we have reprojected and they are no longer correct/meaningful 
gis_gdf = gis_gdf.drop(columns=['shape_srid'])
vri_gdf = vri_gdf.drop(columns=['shape_srid'])
span_gdf = span_gdf.drop(columns=['shape_srid'])

#### 3.1.2 Merging datasets 

In [None]:
# merge on weather station codes, not yet spatial join using gpd
gis_vri_merge = gis_gdf.merge(vri_gdf, left_on='weatherstationcode', right_on='anemometercode')
gis_vri_merge

In [None]:
# find polygon centroids then merge with points
vri_gdf['centroid'] = vri_gdf['shape'].centroid
vri_gdf.head()

In [None]:
# spatial join
vri_gis_sjoin = vri_gdf.sjoin(gis_gdf, how='inner')
vri_gis_sjoin.head()

> Found some anomolies with the dataframe sizes, there seem to be duplicates with the same station name

In [None]:
gis_gdf.shape

In [None]:
vri_gdf.shape

In [None]:
vri_gis_sjoin.shape
# one extra row?

In [None]:
vri_gis_sjoin.index.nunique()
# duplicates

In [None]:
# another spatial join
vri_wingspan_sjoin = vri_gdf.sjoin(span_gdf)
vri_wingspan_sjoin.head()

In [None]:
vri_wingspan_sjoin.shape

In [None]:
span_gdf.shape
# significantly less rows (intersections)

### 3.2 Visualization with probabilities

#### 3.2.1 Folium map with different layers

Weather station markers, VRI risks (heatmap), VRI areas (polygons), PSPS probability (heatmap)

In [None]:
# merge prob_df with the new spatially joined df
prob_merge = vri_gis_sjoin.merge(prob_df, left_on='weatherstationcode', right_on='station').merge(station_summary_2023_08_02, left_on='weatherstationcode', right_on='station')
prob_merge

In [None]:
## VRI risk heatmap
vri_weights = {'H': 3, 'M': 2, 'L': 1}
prob_merge['vri_weight'] = prob_merge['vri'].map(vri_weights)

## PSPS probability heatmap 
# FIXME: heatmap weights not displaying correctly
prob_quantiles = prob_merge['probability (%)'].quantile([0.25, 0.5, 0.75]).tolist()
prob_weights = []
for _, row in prob_merge.iterrows():
    w = 0
    if row['probability (%)'] < prob_quantiles[0]:
        w = 1
    elif row['probability (%)'] < prob_quantiles[1]:
        w = 2
    else:
        w = 3
    prob_weights.append(w)
prob_merge['psps_weight'] = prob_weights

# Check for missing values and remove rows with NaN in latitude, longitude, vri_weight, or probability
cleaned_df = prob_merge.dropna(subset=['latitude', 'longitude', 'vri_weight', 'psps_weight'])

# Create a list of [latitude, longitude, weight] for the heatmap
heat_data = [[row['latitude'], row['longitude'], row['vri_weight']] for index, row in cleaned_df.iterrows()]

# Create a folium map centered around the average coordinates of the data
middle_point = [cleaned_df['latitude'].mean(), cleaned_df['longitude'].mean()]
m = folium.Map(location=middle_point, zoom_start=10)

# Add the heatmap layers
heatmap_layer = folium.FeatureGroup(name='VRI risk')
HeatMap(heat_data, min_opacity=0.2, radius=20, blur=15, max_zoom=1, name='VRI risk').add_to(heatmap_layer)
heatmap_layer.add_to(m)

psps_prob = folium.FeatureGroup(name='PSPS probability')
heat_data2 = [[row['latitude'], row['longitude'], row['psps_weight']] for index, row in cleaned_df.iterrows()]
HeatMap(heat_data2, min_opacity=0.2, radius=20, blur=15, max_zoom=1, name='PSPS prob').add_to(psps_prob)
psps_prob.add_to(m)


## Add weather station points to the map
marker_group = folium.FeatureGroup(name="Weather stations")
for _, row in prob_merge.iterrows():
    # Create a popup with relevant information
    popup_text = f"""
    Weather Station: {row['weatherstationname']} ({row['weatherstationcode']})<br>
    Elevation: {row['elevation']} m<br>
    NWS Zone: {row['nwszone']}<br>
    PSPS Probability: {row['probability (%)']}<br>
    """
    
    # Add a marker for each weather station
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=folium.Popup(popup_text, max_width=300),
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(marker_group)
marker_group.add_to(m)


## Add VRI polygons layer
vri_polygons = folium.FeatureGroup(name='VRI polygons')
for i in vri_gdf['shape']:
    folium.GeoJson(i).add_to(vri_polygons)
vri_polygons.add_to(m)


# Create a layer control object and add it to our map instance
folium.LayerControl().add_to(m)

# Save the map to an HTML file and display it
# m.save('layered_map.html')

# Display interactive map in Jupyter
m

#### 3.2.2 Conductor spans 

In [None]:
span_gdf.groupby(by='psps_station').count()
# each psps station has a different number of conductor spans

In [None]:
# create folium map object
conductor_map = folium.Map(location=middle_point)

## add weather station points to the map
marker_group = folium.FeatureGroup(name="Weather stations")
for _, row in gis_gdf.iterrows():
    # Create a popup with relevant information
    popup_text = f"""
    Weather Station: {row['weatherstationname']}<br>
    Structure ID: {row['structureid']}<br>
    """
    
    # Add a marker for each weather station
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=folium.Popup(popup_text, max_width=300),
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(marker_group)
marker_group.add_to(conductor_map)

# add hlines (each line is blue and very short)
line_group = folium.FeatureGroup(name='Conductor spans')
# only using the first 1000 lines as examples so the map/file isn't too large
for i in span_gdf['shape'][:1000]:
    folium.GeoJson(i).add_to(line_group)
line_group.add_to(conductor_map)

# add lines, grouped by the psps station it is tied to 
# commented out bc the full map/file becomes too large to be uploaded to github
# for group_name, group_data in wingspan_gdf.groupby('psps_station'):
#     feature_group = folium.FeatureGroup(name=str(group_name))
#     for _, row in group_data.iterrows()[:1000]:
#         folium.GeoJson(
#             row['shape'],
#             name=str(group_name)
#         ).add_to(feature_group)
#     feature_group.add_to(conductor_map)


# add layer (to show the difference of added objects more clearly)
folium.LayerControl().add_to(conductor_map)

# Save the map to an HTML file and display it
# conductor_map.save('conductor_span_map.html')

conductor_map

## 4. Network Graph

Creating graph network object of spans to trace upstream/downtream the grid; collect list of weather stations that could cause a shut-off to any given span 

### 4.1 Making the Network Graph

In [None]:
# engineering dataframe and types
# making the "globalid" column str type

span_gdf['globalid'] = span_gdf['globalid'].astype(str)
span_gdf['globalid'] = [x[1:-1] for x in span_gdf['globalid']]

span_gdf['upstream_span_id'] = span_gdf['upstream_span_id'].astype(str)
span_gdf['upstream_span_id'] = [x[1:-1] for x in span_gdf['upstream_span_id']]

In [None]:
# upstream matches around half
span_gdf['upstream_span_id'].isin(span_gdf['globalid']).sum()

In [None]:
# empty upstream 
span_gdf['seg_upstream_trace'].isna().sum()

In [None]:
# create graph
down_g = nx.DiGraph()
up_g = nx.DiGraph()

In [None]:
# add nodes
down_g.add_nodes_from(span_gdf['globalid'])
up_g.add_nodes_from(span_gdf['globalid'])

In [None]:
# add node attributes
partial_span_gdf = span_gdf[['globalid', 'upstream_span_id', 'seg_upstream_trace', 'psps_station']].reset_index()
temp_merge = partial_span_gdf.merge(station_summary_2023_08_02, how='left', left_on='psps_station', right_on='station')
partial_span_gdf = temp_merge[['globalid', 'upstream_span_id', 'seg_upstream_trace', 'psps_station', 'alert']].set_index('globalid')
partial_span_dict = partial_span_gdf.to_dict('index')
nx.set_node_attributes(down_g, partial_span_dict)

In [None]:
# match and add edges 
# .add_edge(u, v)  u -> v
# direction should go from up to down stream so searching for impact will be easier
down_edges = list(zip(span_gdf['upstream_span_id'], span_gdf['globalid']))
down_g.add_edges_from(down_edges);

In [None]:
# add edge downstream attributes
up_edges = list(zip(span_gdf['globalid'], span_gdf['upstream_span_id']))
up_g.add_edges_from(up_edges);

### 4.2 Graph Functions

Functions that will be useful later, such as searching upstrea/downstream span.

In [None]:
# using the network graph down_g

def downstream_spans(span):
    # uses bfs to find immediate downstream layer
    # includes itself
    search_edges = nx.bfs_edges(down_g, source=span)
    downstream_nodes = [span] + [v for u, v in search_edges]
    return downstream_nodes

In [None]:
# using the network graph up_g

def upstream_spans(span):
    # uses bfs to find immediate downstream layer
    # includes itself
    search_edges = nx.bfs_edges(up_g, source=span)
    upstream_nodes = [span] + [v for u, v in search_edges]
    return upstream_nodes

In [None]:
# function test
test_span = '23877069-F148-45BA-9EB8-CBE1DF9A6D87'
print('The downstream spans of test span are:')
print(downstream_spans(test_span))
print('The upstream spans of test span are:')
print(upstream_spans(test_span))

In [None]:
# get an attribute of a span
def span_attribute(span, attr):
    return down_g.nodes[span][attr]

In [None]:
# search which stations would need a psps given this windspeed
def trigger_psps(windspeed):
    yes_df = station_summary_2023_08_02[station_summary_2023_08_02['alert'] <= windspeed]
    return list(yes_df['station'])

In [None]:
# search nodes with this attribute = key, and give its downstream spans
# usage: you know the station name and wants to check the psps impact if that span is turned off

def attr_search_node(attr, key):
    attrs = nx.get_node_attributes(down_g, attr)
    yes_nodes = []
    for node, a in attrs.items():
        if a == key:
            yes_nodes.append(node)

    downstream_list = []
    for n in yes_nodes:
        downstream_list.append(downstream_spans(n))
    
    output = pd.DataFrame(zip(yes_nodes, downstream_list))
    output.columns = ['globalid', 'downstream_spans']
    return output

### 4.3 Mapping Functions

Functions that will help us visualize and validate our data. Creating a map can take minutes due to the number of spans, so test/example calls are not included.

In [None]:
# maps all the spans associated with a given weather station code
# takes a long time to run because of the large number of spans
# example use: map_station_spans('VLC')

def map_station_spans(station):
    # create map
    m = folium.Map(location=middle_point, zoom_start=10)

    # add station marker to map
    row = merged_df[merged_df['weatherstationcode'] == station]
    # Create a popup with relevant information
    popup_text = f"""
    Weather Station: {row['weatherstationname'].iloc[0]}<br>
    Structure ID: {row['structureid'].iloc[0]}<br>
    Alert Windspeed: {row['alert'].iloc[0]}<br>
    """
    folium.Marker(location=[row['latitude'], row['longitude']], 
                  popup=folium.Popup(popup_text, max_width=300),
                  icon=folium.Icon(color='blue', icon='info-sign')).add_to(m)

    # find related spans
    spans_df = attr_search_node('psps_station', station)
    spans_array = np.concatenate((spans_df['globalid'], spans_df['downstream_spans'].sum()))
    spans_array = np.unique(spans_array)

    # add spans to map
    span_shapes = []
    for i in spans_array:
        span_shapes.append(span_gdf[span_gdf['globalid'] == i])
    for i in span_shapes:
        folium.GeoJson(i).add_to(m)

    return m

In [None]:
# maps the downstream spans of a given span globalid
# example use: map_downstream_spans('23877069-F148-45BA-9EB8-CBE1DF9A6D87')

def map_downstream_spans(span):
    # create map
    m = folium.Map(location=middle_point, zoom_start=10)

    # find downstream spans
    spans_array = downstream_spans(span)

    # add spans to map
    span_shapes = []
    for i in spans_array:
        span_shapes.append(span_gdf[span_gdf['globalid'] == i]['shape'])
    span_layer = folium.FeatureGroup(name='spans')
    for i in span_shapes:
        folium.GeoJson(i).add_to(span_layer)
    span_layer.add_to(m)

    # add marker because span might be too small
    coords = span_shapes[0].iloc[0].coords
    center = (coords[0][1], coords[0][0]) # lon lat is reversed
    marker_layer = folium.FeatureGroup(name='span marker')
    folium.Marker(location=center, icon=folium.Icon(color='red', icon='info-sign')).add_to(marker_layer)
    marker_layer.add_to(m)

    # returns multiple layers because this function is used inside another mapping function
    return m, span_layer, marker_layer

In [None]:
# maps the upstream spans of a given span globalid
# example use: map_upstream_spans('23877069-F148-45BA-9EB8-CBE1DF9A6D87')

def map_upstream_spans(span):
    # create map
    m = folium.Map(location=middle_point, zoom_start=10)

    # find downstream spans
    spans_array = upstream_spans(span)

    # add spans to map
    span_shapes = []
    for i in spans_array:
        span_shapes.append(span_gdf[span_gdf['globalid'] == i]['shape'])
    style = {'color': 'red'}
    span_layer = folium.FeatureGroup(name='spans')
    for i in span_shapes:
        folium.GeoJson(i, style_function=lambda x:style).add_to(span_layer)
    span_layer.add_to(m)

    # add marker because span might be too small
    coords = span_shapes[0].iloc[0].coords
    center = (coords[0][1], coords[0][0]) # lon lat is reversed
    marker_layer = folium.FeatureGroup(name='span marker')
    folium.Marker(location=center, icon=folium.Icon(color='red', icon='info-sign')).add_to(marker_layer)
    marker_layer.add_to(m)

    # returns multiple layers because this function is used inside another mapping function
    return m, span_layer, marker_layer

In [None]:
# maps the whole stream of a given span
# example use: map_whole_stream('23877069-F148-45BA-9EB8-CBE1DF9A6D87')

def map_whole_stream(span):
    # create map
    m = folium.Map(location=middle_point, zoom_start=10)

    # combining the layers from the previous two functions into one 
    m1, down_layer, marker_layer = map_downstream_spans(span)
    m2, up_layer, marker_layer = map_upstream_spans(span)

    down_layer.add_to(m)
    up_layer.add_to(m)
    marker_layer.add_to(m)

    return m

## 5. Probability of Conductor Spans

Computing PSPS Probability of every conductor span.

In [None]:
def calculate_station_psps_probability(windspeed_data, station_summary_data):
    station_psps_probabilities = {}

    for _, row in station_summary_data.iterrows():
        station = row['station']
        alert_speed = row['alert']

        station_wind_data = windspeed_data[windspeed_data['station'] == station]
        if station_wind_data.empty:
            continue

        count_above_alert = sum(station_wind_data['wind_speed'] >= alert_speed)
        psps_prob = count_above_alert / len(station_wind_data)
        station_psps_probabilities[station] = psps_prob

    return station_psps_probabilities

station_psps_probabilities = calculate_station_psps_probability(windspeed_2023_08_02, station_summary_2023_08_02)

In [None]:
# first weather station psps
first_key = next(iter(station_psps_probabilities))
first_value = station_psps_probabilities[first_key]
first_value

In [None]:
max_psps_prob_station = max(station_psps_probabilities, key=station_psps_probabilities.get)
print(max_psps_prob_station, ":",station_psps_probabilities[max_psps_prob_station])

Weight the contributions from downstream spans based on the distance to the given span. Total weight starts at 1 for the given span and then adds the weights of all downstream spans.

In [None]:
import networkx.algorithms.shortest_paths.weighted as nx_shortest_path

def calculate_psps_probability(conductor_span_data, windspeed_data, down_g, station_psps_probabilities):
    psps_probabilities = {}

    for _, row in conductor_span_data.iterrows():
        span_id = row['globalid']

        psps_station = row['psps_station']
        if pd.isna(psps_station) or psps_station not in station_psps_probabilities:
            continue

        span_psps_prob = station_psps_probabilities[psps_station]

        downstream_spans = list(down_g.successors(span_id)) if span_id in down_g else []
        total_weight = 1.0  # initiate the weight for the given span
        weighted_sum = span_psps_prob

        for downstream_span in downstream_spans:
            if downstream_span in psps_probabilities:
                # distance between the given span and downstream span
                try:
                    distance = nx_shortest_path.dijkstra_path_length(down_g, span_id, downstream_span, weight='weight')
                    weight = 1 / (distance + 1)  # weight is inversely proportional to distance
                    total_weight += weight
                    weighted_sum += psps_probabilities[downstream_span] * weight
                except nx.NetworkXNoPath:
                    continue

        psps_prob = weighted_sum / total_weight
        psps_probabilities[span_id] = psps_prob

    return psps_probabilities

# PSPS probabilities for all spans
psps_probabilities = calculate_psps_probability(span_df, windspeed_2023_08_02, down_g, station_psps_probabilities)

### Task1: Span(s) impacted by the greatest number of weather stations

In [None]:
station_counts = span_df['psps_station'].value_counts()
max_station_count = station_counts.max()
spans_with_max_stations = span_df[span_df['psps_station'].isin(station_counts[station_counts == max_station_count].index)]['globalid'].unique()
print(spans_with_max_stations)

### Task 2: Span with the highest probability of being shut off

In [None]:
max_psps_prob_span = max(psps_probabilities, key=psps_probabilities.get)
print(max_psps_prob_span, ":",psps_probabilities[max_psps_prob_span])

In [None]:
span_id = max_psps_prob_span
span_details = span_df[span_df['globalid'] == span_id]
print(span_details['station'])
downstream_spans = list(down_g.successors(span_id[1:-1]))
has_downstream = len(downstream_spans) > 0
print(has_downstream)

### Task 3: Probability of any span within parent feederid 222 being shut off

In [None]:
# the same feederid 222 is recorded as both an int and str in the dataset
# so we concatenate them into one dataset with 3024 spans
df1 = span_df[span_df['parent_feederid'] == '222']
df2 = span_df[span_df['parent_feederid'] == 222]
feeder222_df = pd.concat([df1, df2], ignore_index=True)
feeder222_df

In [None]:
feeder_222_spans = feeder222_df['globalid']
feeder_222_probs = [psps_probabilities[span] for span in feeder_222_spans if span in psps_probabilities]
feeder_222_prob = sum(feeder_222_probs) / len(feeder_222_probs) if feeder_222_probs else 0
feeder_222_prob

## 6. Estimating the Future

Estimate expected customers that'll be shut-off over the next 10 years at the span/segment/circuit granularity.

The code below focuses on feederid 222.

In [None]:
expected_customers_shutoff = 0
for span in feeder_222_spans:
    if span in psps_probabilities:
        customer_count = span_df[span_df['globalid'] == span]['downstream_cust_total'].values
        if len(customer_count) > 0:
            expected_customers_shutoff += psps_probabilities[span] * customer_count[0]
print(expected_customers_shutoff)