In [99]:
import geopandas as gpd

# Load GeoJSON data from a local file
gdf = gpd.read_file("src/RTO_Regions.geojson")

# Filter the data for 'Region' type and 'PJM' RTO/ISO
gdf = gdf[(gdf['Map_Type'] == 'Region') & (gdf['RTO_ISO'] == 'PJM')]

# Select only the location names and geometry columns, and reset the index for clean data
gdf = gdf[['LOC_NAME', 'geometry']].copy()
gdf = gdf.reset_index(drop=True)

# Assign a unique ID to each zone starting from 1
gdf['zone_ID'] = [v+1 for v in range(len(gdf))]

# Save the modified data back to a GeoJSON file
gdf.to_file('data/pjm_zone.geojson', driver='GeoJSON')

# Prepare a simplified version of the DataFrame with just zone IDs and location names
gdf = gdf[['zone_ID', 'LOC_NAME']].copy()

# Export the simplified data to a CSV file
gdf.to_csv('data/pjm_zone.csv')

# Note about the data source
# Data sourced from the U.S. Energy Information Administration (EIA)
# Atlas website, specifically the Regional Transmission Organizations (RTO) dataset.
# For more details, visit:
# https://atlas.eia.gov/datasets/b286c693074045b3ac9b5d7300162e99_256/explore


In [100]:
import geopandas as gpd
from shapely.geometry import LineString, MultiLineString, Point

# Set the target voltage level for filtering lines
Target_voltage_level = 69

# Load the geographic data for electric power transmission lines and PJM zones
gdf = gpd.read_file('src/Electric__Power_Transmission_Lines.geojson')
gdf_zone = gpd.read_file("data/pjm_zone.geojson")

# Filter lines where the voltage is greater than or equal to the target level
gdf = gdf[gdf['VOLTAGE'] >= Target_voltage_level]
gdf = gdf[['ID', 'SHAPE_Length', 'VOLTAGE', 'geometry']]

# Function to extract start or end points from line geometries
def extract_endpoints(geometry, endpoint_type):
    if isinstance(geometry, LineString):
        return geometry.coords[0] if endpoint_type == 'start' else geometry.coords[-1]
    elif isinstance(geometry, MultiLineString):
        return geometry.geoms[0].coords[0] if endpoint_type == 'start' else geometry.geoms[-1].coords[-1]
    else:
        return None

# Apply the function to extract endpoints
gdf['start_point'] = gdf['geometry'].apply(lambda x: extract_endpoints(x, 'start'))
gdf['end_point'] = gdf['geometry'].apply(lambda x: extract_endpoints(x, 'end'))
gdf = gdf.drop(columns='geometry')

# Convert the endpoint coordinates into Point geometries
gdf['start_point'] = gdf['start_point'].apply(Point)
gdf['end_point'] = gdf['end_point'].apply(Point)

# Convert DataFrame to GeoDataFrame and set the geometry to start points
gdf = gpd.GeoDataFrame(gdf, geometry='start_point', crs="EPSG:4326")

# Spatial join with zones to determine the start zone of each line
gdf = gpd.sjoin(gdf, gdf_zone[['zone_ID', 'geometry']], how="left", predicate="within")
gdf['start_zone'] = gdf['zone_ID']
gdf = gdf.drop(columns=['index_right', 'zone_ID'])

# Set the geometry to end points and repeat the spatial join for the end zones
gdf.set_geometry('end_point', crs="EPSG:4326", inplace=True)
gdf = gpd.sjoin(gdf, gdf_zone[['zone_ID', 'geometry']], how="left", predicate="within")
gdf['end_zone'] = gdf['zone_ID']
gdf = gdf.drop(columns=['index_right', 'zone_ID'])

# Remove lines that do not intersect any PJM zones at both ends
gdf = gdf.dropna(subset=['start_zone', 'end_zone'], how='all')

# Filter lines that cross different zones
gdf = gdf[gdf['start_zone'] != gdf['end_zone']]

# Identify and separate interface lines and intra-zone lines
gdf_interface = gdf[gdf['start_zone'].isna() | gdf['end_zone'].isna()].copy()
gdf_lines = gdf.dropna(subset=['start_zone', 'end_zone'], how='any').copy()

# Remove point geometries as they are no longer needed in the output
gdf_lines.drop(columns=['start_point', 'end_point'], inplace=True)
gdf_interface.drop(columns=['start_point', 'end_point'], inplace=True)

# Output the processed data to CSV files
gdf_lines.to_csv('data/pjm_line.csv')
gdf_interface.to_csv('data/pjm_interface.csv')

In [101]:
import geopandas as gpd

# Load power plant data and PJM zone data from GeoJSON files
gdf = gpd.read_file("src/Power_Plants.geojson")
gdf_zone = gpd.read_file("data/pjm_zone.geojson")

# Select relevant columns for power plant capacities and location
gdf = gdf[['Bat_MW', 'Bio_MW', 'Coal_MW', 'Geo_MW', 'Hydro_MW', 'HydroPS_MW', 'NG_MW', 
           'Nuclear_MW', 'Crude_MW', 'Solar_MW', 'Wind_MW', 'Other_MW', 'Longitude', 'Latitude']]

# Create GeoDataFrame with point geometry from longitude and latitude columns
gdf = gpd.GeoDataFrame(
    gdf,
    geometry=gpd.points_from_xy(gdf.Longitude, gdf.Latitude),
    crs="EPSG:4326"  # WGS 84 coordinate reference system
)

# Perform spatial join with PJM zones to find power plants within each zone
gdf = gpd.sjoin(
    gdf,
    gdf_zone[["zone_ID", "geometry"]],
    how="inner", 
    predicate="within"
)

# Drop the index column created by spatial join
gdf.drop(columns=["index_right"], inplace=True)

# Output the count of power plants located within the 22 PJM zones
print("Number of power plants within the 22 PJM zones:", len(gdf))

# Reset index for the DataFrame and drop the geometry column
gdf = gdf.reset_index(drop=True)
gdf = gdf.drop(columns='geometry')

# Save the processed data to a CSV file
gdf.to_csv('data/pjm_power.csv')


Number of power plants within the 22 PJM zones: 1806


In [1]:
import pandas as pd
import h5py

# This script processes hourly dynamic line ratings data for existing transmission across the contiguous United States.
# The data can be obtained from:
# https://catalog.data.gov/dataset/hourly-dynamic-line-ratings-for-existing-transmission-across-the-contiguous-united-states

# Replace 'SLR_A-75C.h5' with the path to your specific .h5 file.
file_path = 'src/SLR_A-75C.h5'

# Open the HDF5 file
with h5py.File(file_path, 'r') as file:
    # Read data from the 'data' dataset within the file
    data1 = file['data'][:]  # Using [:] to load all data into memory
    # Read data from the 'index' dataset within the file
    data2 = file['index'][:]  # Using [:] to load all data into memory

# Convert the arrays into a pandas DataFrame
df = pd.DataFrame()
df['line_ID'] = data2  # Line IDs corresponding to transmission lines
df['FMAX'] = data1    # Maximum permissible flow in amperes (A)

# Save the DataFrame to a CSV file
df.to_csv('data/line_ratings.csv')
