In [8]:
import os
from dotenv import load_dotenv
import requests
import pandas as pd
from io import StringIO

In [14]:
# Load environment variables and get API key
load_dotenv()
api_key = os.getenv("GRIDSTATUS_API_KEY")
assert api_key is not None, "GRIDSTATUS_API_KEY not found in environment!"

In [16]:
# Base URL from GridStatus (without query params)
url = "https://api.gridstatus.io/v1/datasets/ercot_solar_actual_and_forecast_by_geo_region_hourly/query"

# Use full available history up to end of 2024
params = {
    "start_time": "2022-06-30",   # earliest available for this dataset
    "end_time":   "2024-12-31",   # stop at end of 2024
    "publish_time": "latest",
    "timezone": "market",
    "api_key": api_key,
    "return_format": "csv",
}

# Make the request
resp = requests.get(url, params=params)
resp.raise_for_status()

df = pd.read_csv(StringIO(resp.text))

# --- Handle datetimes properly (tz-aware) ---

# Parse as UTC to avoid the mixed-tz warning
df["interval_start_local"] = pd.to_datetime(df["interval_start_local"], utc=True)

# Apply cutoff with a proper Timestamp (same tz)
cutoff_utc = pd.Timestamp("2024-12-31 23:59:59", tz="UTC")
df = df[df["interval_start_local"] <= cutoff_utc].copy()

# If you want to see times in ERCOT local time (America/Chicago):
df["interval_start_local"] = df["interval_start_local"].dt.tz_convert("America/Chicago")

# --- Keep only the columns you care about ---

solar_cols = [
    "interval_start_local",
    "pvgrpp_northwest",      # main solar forecast
    "stppf_northwest",       # short-term solar forecast
    "cop_hsl_northwest",     # high solar limit
    "gen_northwest",         # actual solar generation
]

df_solar_nw = (
    df[solar_cols]
    .sort_values("interval_start_local")
    .reset_index(drop=True)
)

print(df_solar_nw.shape)
df_solar_nw.head()


(21958, 5)


Unnamed: 0,interval_start_local,pvgrpp_northwest,stppf_northwest,cop_hsl_northwest,gen_northwest
0,2022-06-30 00:00:00-05:00,0.0,0.0,0.0,0.0
1,2022-06-30 01:00:00-05:00,0.0,0.0,0.0,0.0
2,2022-06-30 02:00:00-05:00,0.0,0.0,0.0,0.0
3,2022-06-30 03:00:00-05:00,0.0,0.0,0.0,0.0
4,2022-06-30 04:00:00-05:00,0.0,0.0,0.0,0.0


In [18]:
# save csv to folder
df_solar_nw.to_csv(r"C:\Users\lemre\Documents\ERCOT_Peaker_Project\ercot_solar_northwest_2022_2024.csv", 
                   index=False)