In [1]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO

## ISO-NY and ERCOT vibe programming

In [2]:
# Define the URL for the 2023 NYISO Day-Ahead LMP data
url = "https://www.eia.gov/electricity/wholesalemarkets/csv/nyiso_lmp_da_hr_zones_2023.csv"

# Create a directory to save the data
save_dir = "nyiso_data"
os.makedirs(save_dir, exist_ok=True)

# Define the path to save the CSV file
file_path = os.path.join(save_dir, "nyiso_lmp_da_hr_zones_2023.csv")

# Download the CSV file
print(f"🔍 Attempting to download data from {url}")
response = requests.get(url)
if response.status_code == 200:
    with open(file_path, "wb") as f:
        f.write(response.content)
    print(f"✅ Data successfully saved to {file_path}")
else:
    print(f"❌ Failed to download data. Status code: {response.status_code}")

# Optional: Load and display the first few rows of the data
try:
    df = pd.read_csv(file_path)
    print("📊 Preview of the downloaded data:")
    print(df.head())
except Exception as e:
    print(f"⚠️ An error occurred while reading the CSV file: {e}")

🔍 Attempting to download data from https://www.eia.gov/electricity/wholesalemarkets/csv/nyiso_lmp_da_hr_zones_2023.csv
✅ Data successfully saved to nyiso_data/nyiso_lmp_da_hr_zones_2023.csv
⚠️ An error occurred while reading the CSV file: Error tokenizing data. C error: Expected 1 fields in line 4, saw 38



In [9]:
# --------------------------
# STEP 1: Load and Clean Data
# --------------------------

# Path to the CSV file (replace with your own file path if needed)
file_path = "nyiso_data/nyiso_lmp_da_hr_zones_2023.csv"

# Skip the first 3 metadata rows, parse dates
df = pd.read_csv(file_path, skiprows=3)

# Convert Local Date to datetime format
df['Local Date'] = pd.to_datetime(df['Local Date'])

# --------------------------
# STEP 2: Filter for 2023 to Present
# --------------------------
df = df[df['Local Date'] >= '2023-01-01']

# --------------------------
# STEP 3: Select Zone LMP Columns
# --------------------------
# Note: Columns 5 to 15 contain LMPs for Zones A–K (based on structure from preview)
lmp_columns = [
    'A - West LMP', 'B - Genessee LMP', 'C - Central LMP', 'D - North LMP',
    'E - Mohawk Valley LMP', 'F - Capital LMP', 'G - Hudson Valley LMP',
    'H - Millwood LMP', 'I - Dunwoodie LMP', 'J - New York City LMP', 'K - Long Island LMP'
]

# Set index for time-based resampling
df.set_index('Local Date', inplace=True)

# --------------------------
# STEP 4: Resample Quarterly and Aggregate
# --------------------------
# Calculate quarterly average LMPs
quarterly_lmp = df[lmp_columns].resample('Q').mean()

# --------------------------
# STEP 5: Save or Display Results
# --------------------------
# Option 1: Display as a table
print("Quarterly Average LMPs for ISO-NY (2023–Present):")
print(quarterly_lmp)

# Option 2: Save to a CSV file
quarterly_lmp.to_csv("nyiso_quarterly_LMP_averages.csv")

Quarterly Average LMPs for ISO-NY (2023–Present):
            A - West LMP  B - Genessee LMP  C - Central LMP  D - North LMP  \
Local Date                                                                   
2023-03-31     27.168958         28.231000        29.929328      25.662214   
2023-06-30     19.971639         20.157711        20.834418      18.401658   
2023-09-30     32.848649         32.929444        33.374268      32.248876   
2023-12-31     26.686222         26.764671        27.621269      25.326964   

            E - Mohawk Valley LMP  F - Capital LMP  G - Hudson Valley LMP  \
Local Date                                                                  
2023-03-31              30.803881        50.129430              42.091834   
2023-06-30              21.244290        29.697436              27.129849   
2023-09-30              34.280960        38.640038              36.757386   
2023-12-31              28.445845        33.011626              31.999057   

            H - Mi

  quarterly_lmp = df[lmp_columns].resample('Q').mean()


In [21]:
data_dir = Path("/Users/alanwang/STAT390/nyiso_data")  
csv_files = list(data_dir.glob("nyiso_lmp_da_hr_zones_*.csv"))

output_dir = data_dir / "quarterly_outputs"
output_dir.mkdir(exist_ok=True)

for file in csv_files:
    print(f"📄 Processing {file.name}")
    try:
        df = pd.read_csv(file, skiprows=3)

        # Parse datetime from 'Local Date'
        df['Local Date'] = pd.to_datetime(df['Local Date'])
        df['Quarter'] = df['Local Date'].dt.to_period('Q').astype(str)

        # Filter for LMP columns only (exclude Congestion/Loss)
        lmp_cols = [col for col in df.columns if 'LMP' in col and 'Congestion' not in col and 'Loss' not in col]
        selected_cols = ['Local Date', 'Quarter'] + lmp_cols

        df = df[selected_cols]

        # Split and write each quarter to its own file
        for quarter, group in df.groupby('Quarter'):
            filename = f"NYISO_LMP_{quarter}.csv"
            group.drop(columns='Quarter').to_csv(output_dir / filename, index=False)
            print(f"✅ Saved: {filename}")

    except Exception as e:
        print(f"⚠️ Failed to process {file.name}: {e}")


📄 Processing nyiso_lmp_da_hr_zones_2025.csv
✅ Saved: NYISO_LMP_2025Q1.csv
✅ Saved: NYISO_LMP_2025Q2.csv
📄 Processing nyiso_lmp_da_hr_zones_2024.csv
✅ Saved: NYISO_LMP_2024Q1.csv
✅ Saved: NYISO_LMP_2024Q2.csv
✅ Saved: NYISO_LMP_2024Q3.csv
✅ Saved: NYISO_LMP_2024Q4.csv
📄 Processing nyiso_lmp_da_hr_zones_2023.csv
✅ Saved: NYISO_LMP_2023Q1.csv
✅ Saved: NYISO_LMP_2023Q2.csv
✅ Saved: NYISO_LMP_2023Q3.csv
✅ Saved: NYISO_LMP_2023Q4.csv
📄 Processing nyiso_lmp_da_hr_zones_2022.csv
✅ Saved: NYISO_LMP_2022Q1.csv
✅ Saved: NYISO_LMP_2022Q2.csv
✅ Saved: NYISO_LMP_2022Q3.csv
✅ Saved: NYISO_LMP_2022Q4.csv


In [18]:
print(f"🧠 Columns in {file.name}: {df.columns.tolist()}")

🧠 Columns in nyiso_lmp_da_hr_zones_2022.csv: ['UTC Timestamp (Interval Ending)', 'Local Timestamp Eastern Time (Interval Beginning)', 'Local Timestamp Eastern Time (Interval Ending)', 'Local Date', 'Hour Number', 'A - West LMP', 'B - Genessee LMP', 'C - Central LMP', 'D - North LMP', 'E - Mohawk Valley LMP', 'F - Capital LMP', 'G - Hudson Valley LMP', 'H - Millwood LMP', 'I - Dunwoodie LMP', 'J - New York City LMP', 'K - Long Island LMP', 'A - West (Congestion)', 'B - Genessee (Congestion)', 'C - Central (Congestion)', 'D - North (Congestion)', 'E - Mohawk Valley (Congestion)', 'F - Capital (Congestion)', 'G - Hudson Valley (Congestion)', 'H - Millwood (Congestion)', 'I - Dunwoodie (Congestion)', 'J - New York City (Congestion)', 'K - Long Island (Congestion)', 'A - West (Loss)', 'B - Genessee (Loss)', 'C - Central (Loss)', 'D - North (Loss)', 'E - Mohawk Valley (Loss)', 'F - Capital (Loss)', 'G - Hudson Valley (Loss)', 'H - Millwood (Loss)', 'I - Dunwoodie (Loss)', 'J - New York City 

# ercot

In [1]:
import requests
import pandas as pd
from io import BytesIO
from zipfile import ZipFile
import os

# ------------------ 🔐 Replace with YOUR INFO -------------------
USERNAME = "alanwang2025@u.northwestern.edu"
PASSWORD = "Fork102$"  # <<< Replace with your actual password
SUBSCRIPTION_KEY = "7076e411aeeb461e8bb085df1690f0cd"  # Your ERCOT Primary Key
# ---------------------------------------------------------------

# Step 1: Authenticate to get an ID Token
print("🔑 Authenticating...")
token_url = "https://ercotb2c.b2clogin.com/ercotb2c.onmicrosoft.com/B2C_1_PUBAPI-ROPC-FLOW/oauth2/v2.0/token"

token_data = {
    "grant_type": "client_credentials",
    "scope": "openid fec253ea-0d06-4272-a5e6-b478baeecd70 offline_access",
    "client_id": "fec253ea-0d06-4272-a5e6-b478baeecd70",
    "username": "alanwang2025@u.northwestern.edu",
    "password": "Fork10$",
    "response_type": "id_token"
}

response = requests.post(token_url, data=token_data)
response.raise_for_status()
id_token = response.json()["id_token"]
print("✅ Token received.")

# Step 2: Access the NP4-183-CD report endpoint
headers = {
    "Authorization": f"Bearer {id_token}",
    "Ocp-Apim-Subscription-Key": SUBSCRIPTION_KEY
}

print("📦 Accessing ERCOT API for DAM Hourly LMPs...")
report_endpoint = "https://api.ercot.com/api/public-reports/np4-183-cd"
r = requests.get(report_endpoint, headers=headers)
r.raise_for_status()
print(response.json())
artifacts = r.json()["_embedded"]["artifacts"]

# Step 3: Download and extract ZIPs
output_dir = "ercot_quarterly_outputs"
os.makedirs(output_dir, exist_ok=True)

for artifact in artifacts:
    download_url = artifact["_links"]["endpoint"]["href"]
    print(f"⬇️ Downloading: {download_url}")
    zip_r = requests.get(download_url, headers=headers)
    if zip_r.status_code == 200:
        with ZipFile(BytesIO(zip_r.content)) as z:
            for file_info in z.infolist():
                if file_info.filename.endswith(".csv"):
                    with z.open(file_info) as f:
                        try:
                            df = pd.read_csv(f)
                            df["Delivery Date"] = pd.to_datetime(df["Delivery Date"])
                            df["Quarter"] = df["Delivery Date"].dt.to_period("Q").astype(str)
                            for quarter, group in df.groupby("Quarter"):
                                out_path = os.path.join(output_dir, f"ERCOT_LMP_{quarter}.csv")
                                if os.path.exists(out_path):
                                    group.to_csv(out_path, mode='a', header=False, index=False)
                                else:
                                    group.to_csv(out_path, index=False)
                        except Exception as e:
                            print(f"❌ Error processing {file_info.filename}: {e}")
    else:
        print(f"⚠️ Failed to download {download_url}")


🔑 Authenticating...


HTTPError: 400 Client Error: Bad Request for url: https://ercotb2c.b2clogin.com/ercotb2c.onmicrosoft.com/B2C_1_PUBAPI-ROPC-FLOW/oauth2/v2.0/token

In [7]:
import requests

token_url = "https://ercotb2c.b2clogin.com/ercotb2c.onmicrosoft.com/B2C_1_PUBAPI-ROPC-FLOW/oauth2/v2.0/token"

headers = {
    "Content-Type": "application/x-www-form-urlencoded"
}

token_data = {
    "grant_type": "password",
    "scope": "openid fec253ea-0d06-4272-a5e6-b478baeecd70 offline_access",
    "client_id": "fec253ea-0d06-4272-a5e6-b478baeecd70",
    "username": "alanwang2025@u.northwestern.edu",
    "password": "Fork102$"
}

response = requests.post(token_url, data=token_data, headers=headers)
response.raise_for_status()
access_token = response.json()["access_token"]
print("✅ Token received.")

✅ Token received.


In [8]:
import base64
import json

token_parts = access_token.split(".")
payload = token_parts[1] + '=' * (-len(token_parts[1]) % 4)  # fix padding
decoded = json.loads(base64.urlsafe_b64decode(payload.encode()).decode())
print(json.dumps(decoded, indent=2))

{
  "aud": "fec253ea-0d06-4272-a5e6-b478baeecd70",
  "iss": "https://ercotb2c.b2clogin.com/6df17afa-1b36-499a-83f7-56779ad0b9a6/v2.0/",
  "exp": 1745532246,
  "nbf": 1745528646,
  "idp": "LocalAccount",
  "oid": "8de9998e-1bce-4aa0-91c4-9dd4d264f0cd",
  "sub": "8de9998e-1bce-4aa0-91c4-9dd4d264f0cd",
  "given_name": "Alan",
  "family_name": "Wang",
  "emails": [
    "alanwang2025@u.northwestern.edu"
  ],
  "tfp": "B2C_1_PUBAPI-ROPC-FLOW",
  "azp": "fec253ea-0d06-4272-a5e6-b478baeecd70",
  "ver": "1.0",
  "iat": 1745528646
}


In [9]:
access_token

'eyJhbGciOiJSUzI1NiIsImtpZCI6Ilg1ZVhrNHh5b2pORnVtMWtsMll0djhkbE5QNC1jNTdkTzZRR1RWQndhTmsiLCJ0eXAiOiJKV1QifQ.eyJhdWQiOiJmZWMyNTNlYS0wZDA2LTQyNzItYTVlNi1iNDc4YmFlZWNkNzAiLCJpc3MiOiJodHRwczovL2VyY290YjJjLmIyY2xvZ2luLmNvbS82ZGYxN2FmYS0xYjM2LTQ5OWEtODNmNy01Njc3OWFkMGI5YTYvdjIuMC8iLCJleHAiOjE3NDU1MzIyNDYsIm5iZiI6MTc0NTUyODY0NiwiaWRwIjoiTG9jYWxBY2NvdW50Iiwib2lkIjoiOGRlOTk5OGUtMWJjZS00YWEwLTkxYzQtOWRkNGQyNjRmMGNkIiwic3ViIjoiOGRlOTk5OGUtMWJjZS00YWEwLTkxYzQtOWRkNGQyNjRmMGNkIiwiZ2l2ZW5fbmFtZSI6IkFsYW4iLCJmYW1pbHlfbmFtZSI6IldhbmciLCJlbWFpbHMiOlsiYWxhbndhbmcyMDI1QHUubm9ydGh3ZXN0ZXJuLmVkdSJdLCJ0ZnAiOiJCMkNfMV9QVUJBUEktUk9QQy1GTE9XIiwiYXpwIjoiZmVjMjUzZWEtMGQwNi00MjcyLWE1ZTYtYjQ3OGJhZWVjZDcwIiwidmVyIjoiMS4wIiwiaWF0IjoxNzQ1NTI4NjQ2fQ.RRgGe-dPmxvUjJPQ8Xt2xHS2Zh_l3gXwm0R7JQ-sGpINh9tHpFUadfNHJcdQ1_A2pRQYW-H8gS51q737xtT5uRXoIpGZfovJLnIADRTDSzg-bG9YJPAX206d8S5VrKOiMPDFyiyMRnqHl8YoiVM-EywnwenJ-Npp2VpI2wkgDI1q98670ou3S5ZwYpeA_zzjgh66sSNQhI_-iMGIO_vBlsdTrPgqo_aF91EO5XJMbV8rABaB6BmKBE0cYubC3QOG994tJduhTuM4mWw_5

In [12]:
import requests
import pandas as pd
import os
import time
from io import StringIO
from zipfile import ZipFile
from datetime import datetime as dt

# Replace with your real token and subscription key
subscription_key = "7076e411aeeb461e8bb085df1690f0cd"


# Headers for API access
headers = {
    "Authorization": f"Bearer {access_token}",
    "Ocp-Apim-Subscription-Key": subscription_key
}

# Step 1: Fetch archive metadata
print("📦 Fetching ERCOT DAM LMP archive list...")
endpoint = "https://api.ercot.com/api/public-reports/archive/np4-183-cd"
response = requests.get(endpoint, headers=headers)
response.raise_for_status()
archives = response.json().get("archives", [])
print(f"✅ Found {len(archives)} archive entries.")

# ✅ Filter for only archives from 2022 onwards
archives = [
    archive for archive in archives
    if "postDatetime" in archive and
       dt.fromisoformat(archive["postDatetime"][:19]) >= dt(2022, 1, 1) and
       dt.fromisoformat(archive["postDatetime"][:19]) <= dt(2025, 12, 31)
]

print(f"✅ Found {len(archives)} archive entries from 2022 onward.")

# Output dir
output_dir = "ercot_dam_outputs"
os.makedirs(output_dir, exist_ok=True)

# Step 2: Download and parse
for i, archive in enumerate(archives):
    download_url = f"https://api.ercot.com/api/public-reports/archive/np4-183-cd?download={archive['docId']}"
    print(f"\n⬇️ Downloading ({i+1}): {archive['friendlyName']}")
    
    for _ in range(3):
        r = requests.get(download_url, headers=headers)
        if r.status_code == 429:
            print("⏳ Rate limit hit. Waiting...")
            time.sleep(2)
        else:
            break

    if r.status_code != 200:
        print(f"⚠️ Failed to download. Status: {r.status_code}")
        continue

    content_type = r.headers.get("Content-Type", "")
    raw = r.content

    try:
        if raw.startswith(b'PK'):  # ZIP magic number
            print("📦 Detected ZIP archive")
            with ZipFile(BytesIO(raw)) as z:
                for file_info in z.infolist():
                    if file_info.filename.endswith(".csv"):
                        with z.open(file_info) as f:
                            df = pd.read_csv(f, engine='python', on_bad_lines='skip')
        else:
            print("🧾 Detected raw CSV file")
            decoded = raw.decode("utf-8", errors="ignore")
            df = pd.read_csv(StringIO(decoded), engine='python', on_bad_lines='skip')

        # Normalize column names
        df.columns = df.columns.str.strip().str.lower().str.replace(" ", "")
        print("📑 Columns:", df.columns.tolist())

        # Accept 'deliverydate' or fallback options
        date_col = None
        for col in df.columns:
            if col in ["deliverydate", "delivery_date", "delvdate"]:
                date_col = col
                break

        if not date_col:
            print(f"❌ No recognized delivery date column in: {archive['friendlyName']}")
            continue

        df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
        df["quarter"] = df[date_col].dt.to_period("Q").astype(str)

        for quarter, group in df.groupby("quarter"):
            out_path = os.path.join(output_dir, f"ERCOT_LMP_{quarter}.csv")
            if os.path.exists(out_path):
                group.to_csv(out_path, mode='a', header=False, index=False)
            else:
                group.to_csv(out_path, index=False)
        
        print(f"✅ Saved: {archive['friendlyName']}")
    except Exception as e:
        print(f"❌ Error parsing {archive['friendlyName']}: {e}")

📦 Fetching ERCOT DAM LMP archive list...
✅ Found 1000 archive entries.
✅ Found 1000 archive entries from 2022 onward.

⬇️ Downloading (1): DAMHRLMPNP4183_csv
📦 Detected ZIP archive
📑 Columns: ['deliverydate', 'hourending', 'busname', 'lmp', 'dstflag']
✅ Saved: DAMHRLMPNP4183_csv

⬇️ Downloading (2): DAMHRLMPNP4183_csv
📦 Detected ZIP archive
📑 Columns: ['deliverydate', 'hourending', 'busname', 'lmp', 'dstflag']
✅ Saved: DAMHRLMPNP4183_csv

⬇️ Downloading (3): DAMHRLMPNP4183_csv
📦 Detected ZIP archive
📑 Columns: ['deliverydate', 'hourending', 'busname', 'lmp', 'dstflag']
✅ Saved: DAMHRLMPNP4183_csv

⬇️ Downloading (4): DAMHRLMPNP4183_csv
📦 Detected ZIP archive
📑 Columns: ['deliverydate', 'hourending', 'busname', 'lmp', 'dstflag']
✅ Saved: DAMHRLMPNP4183_csv

⬇️ Downloading (5): DAMHRLMPNP4183_csv
📦 Detected ZIP archive
📑 Columns: ['deliverydate', 'hourending', 'busname', 'lmp', 'dstflag']
✅ Saved: DAMHRLMPNP4183_csv

⬇️ Downloading (6): DAMHRLMPNP4183_csv
📦 Detected ZIP archive
📑 Columns