In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
import requests

In [2]:
df_texas_ncei = pd.read_csv(os.path.join(os.getcwd().rsplit("\\", 2)[0], r"data\raw", r"texas_weather_data.csv"))
df_texas_ncei.head()

Unnamed: 0,date,station,AWND,PRCP,TMAX,TMIN
0,2015-01-01T00:00:00,GHCND:USC00412114,0.4,9.9,6.1,2.2
1,2015-01-01T00:00:00,GHCND:USC00413340,8.9,1.5,7.2,5.0
2,2015-01-01T00:00:00,GHCND:USC00414792,3.3,3.3,2.8,1.1
3,2015-01-01T00:00:00,GHCND:USC00418040,3.6,0.0,-5.0,-14.4
4,2015-01-01T00:00:00,GHCND:USC00418862,3.6,9.7,4.4,1.1


In [3]:
# Fetchib the API key for EIA energy data
load_dotenv()
EIA_API_KEY = os.getenv("EIA_API_KEY")
if not EIA_API_KEY:
    raise ValueError("EIA_API_KEY is not set in the environment variables.")


In [4]:
import json


api_url = "https://api.eia.gov/v2/electricity/facility-fuel/data/?api_key={}".format(EIA_API_KEY)
payload = {
    "frequency": "annual",
    "data": [
        "generation",
        "gross-generation"
    ],
    "facets": {
        "state": [
            "TX"
        ]
    },
    "start": 2002,
    "end": 2002,
    "sort": [
        {
            "column": "period",
            "direction": "desc"
        }
    ],
    "offset": 0,
    "length": 5000,
}

response = requests.post(api_url, json=payload)
data = response.json()
print(data)

{'response': {'total': '1197', 'dateFormat': 'YYYY', 'frequency': 'annual', 'data': [{'period': '2002', 'plantCode': '54940', 'plantName': 'Austin State Hospital', 'fuel2002': 'NG', 'fuelTypeDescription': 'Natural Gas', 'state': 'TX', 'stateDescription': 'Texas', 'primeMover': ' ', 'generation': '1598', 'gross-generation': '1630.61', 'generation-units': 'megawatthours', 'gross-generation-units': 'megawatthours'}, {'period': '2002', 'plantCode': '54940', 'plantName': 'Austin State Hospital', 'fuel2002': 'ALL', 'fuelTypeDescription': 'Total', 'state': 'TX', 'stateDescription': 'Texas', 'primeMover': 'ALL', 'generation': '1598', 'gross-generation': '1630.61', 'generation-units': 'megawatthours', 'gross-generation-units': 'megawatthours'}, {'period': '2002', 'plantCode': '54940', 'plantName': 'Austin State Hospital', 'fuel2002': 'NG', 'fuelTypeDescription': 'Natural Gas', 'state': 'TX', 'stateDescription': 'Texas', 'primeMover': 'ALL', 'generation': '1598', 'gross-generation': '1630.61', '

In [5]:
# convert the data to a DataFrame
df = pd.DataFrame(data['response']['data'])
df.head()

Unnamed: 0,period,plantCode,plantName,fuel2002,fuelTypeDescription,state,stateDescription,primeMover,generation,gross-generation,generation-units,gross-generation-units
0,2002,54940,Austin State Hospital,NG,Natural Gas,TX,Texas,,1598.0,1630.61,megawatthours,megawatthours
1,2002,54940,Austin State Hospital,ALL,Total,TX,Texas,ALL,1598.0,1630.61,megawatthours,megawatthours
2,2002,54940,Austin State Hospital,NG,Natural Gas,TX,Texas,ALL,1598.0,1630.61,megawatthours,megawatthours
3,2002,54943,East Texas Gas Plant,NG,Natural Gas,TX,Texas,,23.52,24.0,megawatthours,megawatthours
4,2002,54943,East Texas Gas Plant,ALL,Total,TX,Texas,ALL,23.52,24.0,megawatthours,megawatthours


In [6]:
df['period'].unique()

array(['2002'], dtype=object)

In [7]:
import time

annual_data = []
for year in range(2015, 2026):
    payload['start'] = year
    payload['end'] = year
    response = requests.post(api_url, json=payload)
    data = response.json()
    if 'response' in data and 'data' in data['response']:
        annual_data.extend(data['response']['data'])
    print(f"Fetched data for year {year}: {len(data['response']['data']) if 'response' in data and 'data' in data['response'] else 0} records")
    time.sleep(5)

# Convert to DataFrame
annual_df = pd.DataFrame(annual_data)
annual_df.head()


Fetched data for year 2015: 1576 records
Fetched data for year 2016: 1610 records
Fetched data for year 2017: 1724 records
Fetched data for year 2018: 1783 records
Fetched data for year 2019: 1844 records
Fetched data for year 2020: 1929 records
Fetched data for year 2021: 2301 records
Fetched data for year 2022: 2536 records
Fetched data for year 2023: 2729 records
Fetched data for year 2024: 1426 records
Fetched data for year 2025: 0 records


Unnamed: 0,period,plantCode,plantName,fuel2002,fuelTypeDescription,state,stateDescription,primeMover,generation,gross-generation,generation-units,gross-generation-units
0,2015,57504,TECO CHP-1,ALL,Total,TX,Texas,ALL,42506,62641,megawatthours,megawatthours
1,2015,57504,TECO CHP-1,NG,Natural Gas,TX,Texas,ALL,42506,62641,megawatthours,megawatthours
2,2015,57504,TECO CHP-1,NG,Natural Gas,TX,Texas,GT,42506,62641,megawatthours,megawatthours
3,2015,56557,EXC Wind 1 LLC,ALL,Total,TX,Texas,ALL,24567,25550,megawatthours,megawatthours
4,2015,56557,EXC Wind 1 LLC,WND,Wind,TX,Texas,ALL,24567,25550,megawatthours,megawatthours


In [8]:
annual_df_filtering = annual_df.copy()
state = "TX"

In [9]:
annual_df_filtering.head()

Unnamed: 0,period,plantCode,plantName,fuel2002,fuelTypeDescription,state,stateDescription,primeMover,generation,gross-generation,generation-units,gross-generation-units
0,2015,57504,TECO CHP-1,ALL,Total,TX,Texas,ALL,42506,62641,megawatthours,megawatthours
1,2015,57504,TECO CHP-1,NG,Natural Gas,TX,Texas,ALL,42506,62641,megawatthours,megawatthours
2,2015,57504,TECO CHP-1,NG,Natural Gas,TX,Texas,GT,42506,62641,megawatthours,megawatthours
3,2015,56557,EXC Wind 1 LLC,ALL,Total,TX,Texas,ALL,24567,25550,megawatthours,megawatthours
4,2015,56557,EXC Wind 1 LLC,WND,Wind,TX,Texas,ALL,24567,25550,megawatthours,megawatthours


In [10]:
annual_df_filtering = annual_df_filtering[(annual_df_filtering['state'] == state) & (annual_df_filtering['fuel2002'].isin(['WND', 'SUN']))]

In [11]:
annual_df_filtering.drop(columns=['primeMover','generation','generation-units'], inplace=True)

In [12]:
annual_df_filtering.head()


Unnamed: 0,period,plantCode,plantName,fuel2002,fuelTypeDescription,state,stateDescription,gross-generation,gross-generation-units
4,2015,56557,EXC Wind 1 LLC,WND,Wind,TX,Texas,25550,megawatthours
5,2015,56557,EXC Wind 1 LLC,WND,Wind,TX,Texas,25550,megawatthours
7,2015,56558,EXC Wind 2 LLC,WND,Wind,TX,Texas,24248,megawatthours
8,2015,56558,EXC Wind 2 LLC,WND,Wind,TX,Texas,24248,megawatthours
10,2015,56559,EXC Wind 3 LLC,WND,Wind,TX,Texas,23420,megawatthours


In [13]:
print(annual_df_filtering.info())

<class 'pandas.core.frame.DataFrame'>
Index: 4958 entries, 4 to 19433
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   period                  4958 non-null   object
 1   plantCode               4958 non-null   object
 2   plantName               4958 non-null   object
 3   fuel2002                4958 non-null   object
 4   fuelTypeDescription     4958 non-null   object
 5   state                   4958 non-null   object
 6   stateDescription        4958 non-null   object
 7   gross-generation        4958 non-null   object
 8   gross-generation-units  4958 non-null   object
dtypes: object(9)
memory usage: 387.3+ KB
None


In [16]:
os.getcwd().rsplit("\\", 2)[0]
annual_df_filtering.to_csv(os.path.join(os.getcwd().rsplit("\\", 2)[0], r"data\raw", "energy_eia_raw_data.csv"), index=False)

In [24]:
# drop duplicate rows
annual_df_filtering_copy = annual_df_filtering.copy()
annual_df_filtering_copy.drop_duplicates(inplace=True)

In [25]:
annual_df_filtering_copy.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2479 entries, 4 to 19432
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   period                  2479 non-null   object
 1   plantCode               2479 non-null   object
 2   plantName               2479 non-null   object
 3   fuel2002                2479 non-null   object
 4   fuelTypeDescription     2479 non-null   object
 5   state                   2479 non-null   object
 6   stateDescription        2479 non-null   object
 7   gross-generation        2479 non-null   object
 8   gross-generation-units  2479 non-null   object
dtypes: object(9)
memory usage: 193.7+ KB


In [26]:
annual_df_filtering_copy.drop(columns=['stateDescription','fuelTypeDescription'], inplace=True)

In [27]:
annual_df_filtering_copy.rename(columns={'fuel2002': 'source','period':'year'}, inplace=True)

In [29]:

annual_df_filtering_copy.reset_index(drop=True, inplace=True)

In [31]:
annual_df_filtering_copy.head()

Unnamed: 0,year,plantCode,plantName,source,state,gross-generation,gross-generation-units
0,2015,56557,EXC Wind 1 LLC,WND,TX,25550,megawatthours
1,2015,56558,EXC Wind 2 LLC,WND,TX,24248,megawatthours
2,2015,56559,EXC Wind 3 LLC,WND,TX,23420,megawatthours
3,2015,56560,EXC Wind 4 LLC,WND,TX,230934,megawatthours
4,2015,56561,EXC Wind 5 LLC,WND,TX,25725,megawatthours


In [32]:
os.getcwd().rsplit("\\", 2)[0]
annual_df_filtering_copy.to_csv(os.path.join(os.getcwd().rsplit("\\", 2)[0], r"data\raw", "energy_eia_raw_data_filtered.csv"), index=False)