In [1]:
import requests
import zipfile
import io
import pandas as pd

In [5]:
def increment_date_by_month(date_str, increment):
    # Step 1: Convert the string to a datetime object
    date = pd.to_datetime(date_str, format='%Y%m%d')
    
    # Step 2: Add one month
    next_month_date = date + pd.DateOffset(months=increment)
    
    # Step 3: Convert the datetime object back to a string in the desired format
    return next_month_date.strftime('%Y%m%d')

# Weather Forecast Data

In [6]:
url = 'http://mis.nyiso.com/public/'
df_year = []
year = 20200101
for i in range(12):
    url_date = increment_date_by_month(year, i)
    zip_url = f'csv/lfweather/{url_date}lfweather_csv.zip'

    #GET request
    response = requests.get(url+zip_url)

    #check if it went through
    if response.status_code == 200:
        #extract zip file
        with zipfile.ZipFile(io.BytesIO(response.content), 'r') as zip_ref:
            file_list = zip_ref.namelist()
            #initialize an empty list
            dfs =[]

            #loop through CSV
            for file_name in file_list:
                if file_name.endswith('.csv'):
                    with zip_ref.open(file_name) as csv_file:
                        df = pd.read_csv(csv_file)
                        df = df[(df['Station ID'].isin(['ISP', 'LGA', 'JFK'])) & (df['Vintage'] == 'Forecast')]                    
                    dfs.append(df)
            
            #merge dataframes
            combined_df = pd.concat(dfs, ignore_index=True)
            df_year.append(combined_df)
    else:
        print('Failed to download ZIP files')

df = pd.concat(df_year, ignore_index=True)
df.dropna(axis=1, inplace= True)

In [7]:
df['Forecast Date'] = pd.to_datetime(df['Forecast Date'])
df['Vintage Date'] = pd.to_datetime(df['Vintage Date'])

index = df['Vintage Date'] == (df['Forecast Date'] + pd.DateOffset(days=1))
df['Forecast Date'] + pd.DateOffset(days=1)

0      2020-01-02
1      2020-01-02
2      2020-01-02
3      2020-01-02
4      2020-01-02
          ...    
2191   2021-01-01
2192   2021-01-01
2193   2021-01-01
2194   2021-01-01
2195   2021-01-01
Name: Forecast Date, Length: 2196, dtype: datetime64[ns]

In [8]:
def compute_HDD(max_temp, min_temp, base_temp= 65):
    """
    Parameters:
        max_temp (float): The maximum temperature for the day in Fahrenheit.
        min_temp (float): The minimum temperature for the day in Fahrenheit.
        base_temp (float): The base temperature in Fahrenheit (default is 65°C).
        
    Returns:
        float: The Heating Degree Days for the day.
    """
    avg_temp = (max_temp + min_temp) / 2
    hdd = max(0, base_temp - avg_temp)
    return hdd

def compute_CDD(max_temp, min_temp, base_temp= 65):
    """
    Parameters:
        max_temp (float): The maximum temperature for the day in Fahrenheit.
        min_temp (float): The minimum temperature for the day in Fahrenheit.
        base_temp (float): The base temperature in Fahrenheit (default is 65°C).
        
    Returns:
        float: The Heating Degree Days for the day.
    """
    avg_temp = (max_temp + min_temp) / 2
    hdd = max(0, avg_temp - base_temp)
    return hdd


In [9]:
df['HDD'] = df.apply(lambda x: compute_HDD(max_temp= x['Max Temp'], min_temp= x['Min Temp']), axis= 1)
df['CDD'] = df.apply(lambda x: compute_CDD(max_temp= x['Max Temp'], min_temp= x['Min Temp']), axis= 1)
df.head()

Unnamed: 0,Forecast Date,Vintage Date,Vintage,Station ID,Max Temp,Min Temp,Max Wet Bulb,Min Wet Bulb,HDD,CDD
0,2020-01-01,2020-01-01,Forecast,ISP,42,32,38,29,28.0,0.0
1,2020-01-01,2020-01-01,Forecast,JFK,42,34,38,30,27.0,0.0
2,2020-01-01,2020-01-01,Forecast,LGA,42,36,36,31,26.0,0.0
3,2020-01-01,2020-01-02,Forecast,ISP,44,29,39,27,28.5,0.0
4,2020-01-01,2020-01-02,Forecast,JFK,46,32,40,29,26.0,0.0


# Load Forecast

In [10]:
url = 'http://mis.nyiso.com/public/'
df_year = []
year = 20200101
for i in range(12):
    url_date = increment_date_by_month(year, i)
    zip_url = f'csv/isolf/{url_date}isolf_csv.zip'

    #GET request
    response = requests.get(url+zip_url)

    #check if it went through
    if response.status_code == 200:
        #extract zip file
        with zipfile.ZipFile(io.BytesIO(response.content), 'r') as zip_ref:
            file_list = zip_ref.namelist()
            #initialize an empty list
            dfs =[]

            #loop through CSV
            for file_name in file_list:
                if file_name.endswith('.csv'):
                    with zip_ref.open(file_name) as csv_file:
                        df = pd.read_csv(csv_file)
                                          
                    dfs.append(df)
            
            #merge dataframes
            combined_df = pd.concat(dfs, ignore_index=True)
            df_year.append(combined_df)
    else:
        print('Failed to download ZIP files')

load_forecast = pd.concat(df_year, ignore_index=True)
load_forecast.dropna(axis=1, inplace=True)

# Convert 'Time Stamp' to datetime and format it to include seconds
load_forecast['Time Stamp'] = pd.to_datetime(load_forecast['Time Stamp'], format="%m/%d/%Y %H:%M")

# Add seconds to 'Time Stamp'
load_forecast['Time Stamp'] = load_forecast['Time Stamp'].dt.strftime("%Y/%m/%d %H:%M:%S")
load_forecast.set_index('Time Stamp', inplace=True)
# Set 'Time Stamp' as index
load_forecast['Longil'][:50]

43801


Time Stamp
2020/01/01 00:00:00    2012
2020/01/01 01:00:00    1896
2020/01/01 02:00:00    1798
2020/01/01 03:00:00    1737
2020/01/01 04:00:00    1721
2020/01/01 05:00:00    1748
2020/01/01 06:00:00    1795
2020/01/01 07:00:00    1786
2020/01/01 08:00:00    1777
2020/01/01 09:00:00    1767
2020/01/01 10:00:00    1781
2020/01/01 11:00:00    1814
2020/01/01 12:00:00    1874
2020/01/01 13:00:00    1918
2020/01/01 14:00:00    1974
2020/01/01 15:00:00    2062
2020/01/01 16:00:00    2279
2020/01/01 17:00:00    2580
2020/01/01 18:00:00    2600
2020/01/01 19:00:00    2559
2020/01/01 20:00:00    2492
2020/01/01 21:00:00    2372
2020/01/01 22:00:00    2193
2020/01/01 23:00:00    2007
2020/01/02 00:00:00    1892
2020/01/02 01:00:00    1822
2020/01/02 02:00:00    1777
2020/01/02 03:00:00    1768
2020/01/02 04:00:00    1807
2020/01/02 05:00:00    1912
2020/01/02 06:00:00    2108
2020/01/02 07:00:00    2209
2020/01/02 08:00:00    2186
2020/01/02 09:00:00    2131
2020/01/02 10:00:00    2071
2020/01/0

# Interface Flows

In [11]:
url = 'http://mis.nyiso.com/public/'
df_year = []
year = 20200101
for i in range(12):
    url_date = increment_date_by_month(year, i)
    zip_url = f'csv/ExternalLimitsFlows/{url_date}ExternalLimitsFlows_csv.zip'

    #GET request
    response = requests.get(url+zip_url)

    #check if it went through
    if response.status_code == 200:
        #extract zip file
        with zipfile.ZipFile(io.BytesIO(response.content), 'r') as zip_ref:
            file_list = zip_ref.namelist()
            #initialize an empty list
            dfs =[]

            #loop through CSV
            for file_name in file_list:
                if file_name.endswith('.csv'):
                    with zip_ref.open(file_name) as csv_file:
                        df = pd.read_csv(csv_file)
                                          
                    dfs.append(df)
            
            #merge dataframes
            combined_df = pd.concat(dfs, ignore_index=True)
            df_year.append(combined_df)
    else:
        print('Failed to download ZIP files')

flows = pd.concat(df_year, ignore_index=True)
flows['Timestamp'] = pd.to_datetime(flows['Timestamp'])

flows.set_index('Timestamp', inplace= True)
flows.drop(columns= ['Interface Name', 'Point ID'], inplace= True)

In [12]:
flows_5m = flows.resample('5min').sum()
flows_60m = flows_5m.resample('60min').mean()
flows_60m

Unnamed: 0_level_0,Flow (MWH),Positive Limit (MWH),Negative Limit (MWH)
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-01 00:00:00,15628.028333,45813.000000,-79558.0
2020-01-01 01:00:00,15598.565000,46070.500000,-79558.0
2020-01-01 02:00:00,15420.762500,46158.000000,-79558.0
2020-01-01 03:00:00,14731.755833,46138.000000,-79558.0
2020-01-01 04:00:00,14189.192500,46128.000000,-79558.0
...,...,...,...
2020-12-31 19:00:00,19031.432500,45848.000000,-79368.0
2020-12-31 20:00:00,19378.762500,45843.000000,-79368.0
2020-12-31 21:00:00,18693.134167,45843.000000,-79368.0
2020-12-31 22:00:00,19711.463333,45751.333333,-79368.0


In [3]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://historical-forecast-api.open-meteo.com/v1/forecast"
params = {
	"latitude": 40.8168,
	"longitude": -73.0662,
	"start_date": "2020-01-01",
	"end_date": "2020-12-31",
	"hourly": ["temperature_2m", "weather_code", "relative_humidity_2m", "precipitation", "wind_speed_10m", "wind_direction_10m"]
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

							# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_weather_code = hourly.Variables(1).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(3).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(5).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m

hourly_weather = pd.DataFrame(data = hourly_data)
print(hourly_weather)

Coordinates 40.83027267456055°N -73.06680297851562°E
Elevation 32.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
                          date  temperature_2m  relative_humidity_2m  \
0    2020-01-01 00:00:00+00:00          3.6475                  84.0   
1    2020-01-01 01:00:00+00:00          3.2975                  85.0   
2    2020-01-01 02:00:00+00:00          2.9975                  83.0   
3    2020-01-01 03:00:00+00:00          4.2475                  88.0   
4    2020-01-01 04:00:00+00:00          4.6475                  80.0   
...                        ...             ...                   ...   
8779 2020-12-31 19:00:00+00:00          6.6975                  78.0   
8780 2020-12-31 20:00:00+00:00          6.2475                  75.0   
8781 2020-12-31 21:00:00+00:00          6.0475                  77.0   
8782 2020-12-31 22:00:00+00:00          5.3475                  68.0   
8783 2020-12-31 23:00:00+00:00          4.6975                  61.0   

      prec