In [1]:
import os
from dotenv import load_dotenv
import requests
import pandas as pd
from io import StringIO

In [2]:
# Load environment variables and get API key
load_dotenv()
api_key = os.getenv("GRIDSTATUS_API_KEY")
assert api_key is not None, "GRIDSTATUS_API_KEY not found in environment!"

In [3]:
# Base URL
url = "https://api.gridstatus.io/v1/datasets/ercot_solar_actual_and_forecast_by_geo_region_hourly/query"

# Query parameters
params = {
    "start_time": "2022-06-30",
    "end_time":   "2024-12-31",
    "publish_time": "latest",
    "timezone": "market",
    "api_key": api_key,
    "return_format": "csv",
}

# Make request
resp = requests.get(url, params=params)
resp.raise_for_status()

df = pd.read_csv(StringIO(resp.text))

# timezone workaround to avoid error of timezones not matching

# 1. Parse timestamps as UTC (prevents mixed offset errors)
df["interval_start_local"] = pd.to_datetime(df["interval_start_local"], utc=True)

# 2. Apply cutoff (also tz-aware)
cutoff_utc = pd.Timestamp("2024-12-31 23:59:59", tz="UTC")
df = df[df["interval_start_local"] <= cutoff_utc].copy()

# 3. Convert UTC → ERCOT local time
df["interval_start_local"] = df["interval_start_local"].dt.tz_convert("America/Chicago")

# include systemwide + all regions

regions = ["system_wide", "centerwest", "northwest", "fareast", "southeast", "centereast"]

solar_cols = ["interval_start_local"]

for r in regions:
    solar_cols += [
        f"pvgrpp_{r}",
        f"stppf_{r}",
        f"cop_hsl_{r}",
        f"gen_{r}",
    ]

# Some systemwide datasets include an extra "hsl_system_wide"
if "hsl_system_wide" in df.columns:
    solar_cols.append("hsl_system_wide")

df_solar_all = (
    df[solar_cols]
    .sort_values("interval_start_local")
    .reset_index(drop=True)
)

print(df_solar_all.shape)
df_solar_all.head()


(21958, 26)


Unnamed: 0,interval_start_local,pvgrpp_system_wide,stppf_system_wide,cop_hsl_system_wide,gen_system_wide,pvgrpp_centerwest,stppf_centerwest,cop_hsl_centerwest,gen_centerwest,pvgrpp_northwest,...,gen_fareast,pvgrpp_southeast,stppf_southeast,cop_hsl_southeast,gen_southeast,pvgrpp_centereast,stppf_centereast,cop_hsl_centereast,gen_centereast,hsl_system_wide
0,2022-06-30 00:00:00-05:00,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,...,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1,2022-06-30 01:00:00-05:00,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,...,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2,2022-06-30 02:00:00-05:00,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,...,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,2022-06-30 03:00:00-05:00,0.0,0.0,0.0,0.11,0.0,0.0,0.0,0.0,0.0,...,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,2022-06-30 04:00:00-05:00,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,...,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,


In [4]:
# save csv to folder
df_solar_all.to_csv(
    r"C:\Users\lemre\Documents\ERCOT_Peaker_Project\ercot_solar_allzones_2022_2024.csv",
    index=False
)