In [4]:
# Fetchib the API key for EIA energy data
import os
import pandas as pd
from dotenv import load_dotenv
import requests
load_dotenv()
EIA_API_KEY = os.getenv("EIA_API_KEY")
if not EIA_API_KEY:
    raise ValueError("EIA_API_KEY is not set in the environment variables.")


In [5]:
import requests
import pandas as pd
import time

api_url = f"https://api.eia.gov/v2/electricity/facility-fuel/data/?api_key={EIA_API_KEY}"

LIMIT = 5000
offset = 0

all_data = []

# Change start and end years as needed
start_year = 2002
end_year = 2026

while True:
    payload = {
        "frequency": "annual",
        "data": [
            "generation",
            "gross-generation"
        ],
        "facets": {
            "state": ["TX"]
        },
        "start": start_year,
        "end": end_year,
        "sort": [
            {
                "column": "period",
                "direction": "desc"
            }
        ],
        "offset": offset,
        "length": LIMIT
    }
    
    response = requests.post(api_url, json=payload)
    response.raise_for_status()
    data = response.json()

    records = data.get("response", {}).get("data", [])
    if not records:
        break
    
    all_data.extend(records)
    print(f"Fetched {len(records)} records at offset {offset}")

    if len(records) < LIMIT:
        break  # no more pages

    offset += LIMIT
    time.sleep(5)  # pause to avoid rate limits

df = pd.DataFrame(all_data)
print(f"Total records fetched: {len(df)}")
print(df.head())


Fetched 5000 records at offset 0
Fetched 5000 records at offset 5000
Fetched 5000 records at offset 10000
Fetched 5000 records at offset 15000
Fetched 5000 records at offset 20000
Fetched 5000 records at offset 25000
Fetched 5000 records at offset 30000
Fetched 1928 records at offset 35000
Total records fetched: 36928
  period plantCode                  plantName fuel2002 fuelTypeDescription  \
0   2024     66902       Roadrunner Wind Farm      ALL               Total   
1   2024     59321   Los Vientos Windpower IV      WND                Wind   
2   2024     59321   Los Vientos Windpower IV      WND                Wind   
3   2024     59321   Los Vientos Windpower IV      ALL               Total   
4   2024     59320  Los Vientos Windpower III      WND                Wind   

  state stateDescription primeMover generation gross-generation  \
0    TX            Texas        ALL     416282           422277   
1    TX            Texas         WT     466943           467859   
2    TX   

In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36928 entries, 0 to 36927
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   period                  36928 non-null  object
 1   plantCode               36928 non-null  object
 2   plantName               36928 non-null  object
 3   fuel2002                36928 non-null  object
 4   fuelTypeDescription     36928 non-null  object
 5   state                   36928 non-null  object
 6   stateDescription        36928 non-null  object
 7   primeMover              36928 non-null  object
 8   generation              36928 non-null  object
 9   gross-generation        36928 non-null  object
 10  generation-units        36928 non-null  object
 11  gross-generation-units  36928 non-null  object
dtypes: object(12)
memory usage: 3.4+ MB


In [27]:
os.getcwd().rsplit("\\", 2)[0]
df.to_csv(os.path.join(os.getcwd().rsplit("\\", 2)[0], r"data\raw", "eia_annual_texas_raw.csv"), index=False)


In [16]:
df_copy = df.copy()


In [17]:
# sort by period in descending order
df_copy.sort_values(by='period', ascending=False, inplace=True)
df_copy.head()

Unnamed: 0,period,plantCode,plantName,fuel2002,fuelTypeDescription,state,stateDescription,primeMover,generation,gross-generation,generation-units,gross-generation-units
0,2024,66902,Roadrunner Wind Farm,ALL,Total,TX,Texas,ALL,416282,422277.0,megawatthours,megawatthours
937,2024,62562,"High Lonesome Wind Power, LLC Hybrid",WND,Wind,TX,Texas,WT,1183711,1184601.84,megawatthours,megawatthours
957,2024,56350,Colorado Bend Energy Center,ALL,Total,TX,Texas,ALL,2431330,2505079.0,megawatthours,megawatthours
956,2024,56350,Colorado Bend Energy Center,NG,Natural Gas,TX,Texas,ALL,2431330,2505079.0,megawatthours,megawatthours
955,2024,56350,Colorado Bend Energy Center,NG,Natural Gas,TX,Texas,CA,952169,955070.0,megawatthours,megawatthours


In [18]:
# reset index after sorting
df_copy.reset_index(drop=True, inplace=True)
df_copy.head()

Unnamed: 0,period,plantCode,plantName,fuel2002,fuelTypeDescription,state,stateDescription,primeMover,generation,gross-generation,generation-units,gross-generation-units
0,2024,66902,Roadrunner Wind Farm,ALL,Total,TX,Texas,ALL,416282,422277.0,megawatthours,megawatthours
1,2024,62562,"High Lonesome Wind Power, LLC Hybrid",WND,Wind,TX,Texas,WT,1183711,1184601.84,megawatthours,megawatthours
2,2024,56350,Colorado Bend Energy Center,ALL,Total,TX,Texas,ALL,2431330,2505079.0,megawatthours,megawatthours
3,2024,56350,Colorado Bend Energy Center,NG,Natural Gas,TX,Texas,ALL,2431330,2505079.0,megawatthours,megawatthours
4,2024,56350,Colorado Bend Energy Center,NG,Natural Gas,TX,Texas,CA,952169,955070.0,megawatthours,megawatthours


In [19]:
# rename columns for clarity
df_copy.rename(columns={
    'period': 'year',
    'fuel2002': 'fuelType',
    'gross-generation': 'grossGeneration',
    'gross-generation-units': 'grossGenerationUnit',
}
, inplace=True)



In [20]:
# filter out rows for 'fueltype' that are wind and sun
df_copy = df_copy[df_copy['fuelType'].isin(['WND', 'SUN'])]
df_copy.head()

Unnamed: 0,year,plantCode,plantName,fuelType,fuelTypeDescription,state,stateDescription,primeMover,generation,grossGeneration,generation-units,grossGenerationUnit
1,2024,62562,"High Lonesome Wind Power, LLC Hybrid",WND,Wind,TX,Texas,WT,1183711,1184601.84,megawatthours,megawatthours
8,2024,56291,Horse Hollow Wind Energy Center,WND,Wind,TX,Texas,ALL,2458071,2617502.0,megawatthours,megawatthours
9,2024,56291,Horse Hollow Wind Energy Center,WND,Wind,TX,Texas,WT,2458071,2617502.0,megawatthours,megawatthours
11,2024,56395,Mesquite Wind Power LLC,WND,Wind,TX,Texas,ALL,387748,388599.0,megawatthours,megawatthours
12,2024,56395,Mesquite Wind Power LLC,WND,Wind,TX,Texas,WT,387748,388599.0,megawatthours,megawatthours


In [21]:
# drop columns that are not needed
columns_to_drop = [
    'fuelTypeDescription',
    'stateDescription',
    'primeMover',
    'generation',
    'generation-units']
df_copy.drop(columns=columns_to_drop, inplace=True, errors='ignore')


In [22]:
df_copy.columns

Index(['year', 'plantCode', 'plantName', 'fuelType', 'state',
       'grossGeneration', 'grossGenerationUnit'],
      dtype='object')

In [26]:
df_copy[df_copy['fuelType'].isin(['WND', 'SUN'])]['fuelType'].unique()

array(['WND', 'SUN'], dtype=object)

In [None]:
# reset index
df_copy.reset_index(drop=True, inplace=True)
df_copy.head()

Unnamed: 0,year,plantCode,plantName,fuelType,state,grossGeneration,grossGenerationUnit
0,2024,62562,"High Lonesome Wind Power, LLC Hybrid",WND,TX,1184601.84,megawatthours
1,2024,56291,Horse Hollow Wind Energy Center,WND,TX,2617502.0,megawatthours
2,2024,56291,Horse Hollow Wind Energy Center,WND,TX,2617502.0,megawatthours
3,2024,56395,Mesquite Wind Power LLC,WND,TX,388599.0,megawatthours
4,2024,56395,Mesquite Wind Power LLC,WND,TX,388599.0,megawatthours


In [34]:
os.getcwd().rsplit("\\", 2)[0]
df_copy.to_csv(os.path.join(os.getcwd().rsplit("\\", 2)[0], r"data\raw", "eia_annual_texas_filtered.csv"), index=False)