In [100]:
import requests
import zipfile
import io
import pandas as pd

In [101]:
def increment_date_by_month(date_str, increment):
    # Step 1: Convert the string to a datetime object
    date = pd.to_datetime(date_str, format='%Y%m%d')
    
    # Step 2: Add one month
    next_month_date = date + pd.DateOffset(months=increment)
    
    # Step 3: Convert the datetime object back to a string in the desired format
    return next_month_date.strftime('%Y%m%d')

# Weather Forecast Data

In [102]:
url = 'http://mis.nyiso.com/public/'
df_year = []
year = 20200101
for i in range(12):
    url_date = increment_date_by_month(year, i)
    zip_url = f'csv/lfweather/{url_date}lfweather_csv.zip'

    #GET request
    response = requests.get(url+zip_url)

    #check if it went through
    if response.status_code == 200:
        #extract zip file
        with zipfile.ZipFile(io.BytesIO(response.content), 'r') as zip_ref:
            file_list = zip_ref.namelist()
            #initialize an empty list
            dfs =[]

            #loop through CSV
            for file_name in file_list:
                if file_name.endswith('.csv'):
                    with zip_ref.open(file_name) as csv_file:
                        df = pd.read_csv(csv_file)
                        df = df[(df['Station ID'].isin(['ISP', 'LGA', 'JFK'])) & (df['Vintage'] == 'Forecast')]                    
                    dfs.append(df)
            
            #merge dataframes
            combined_df = pd.concat(dfs, ignore_index=True)
            df_year.append(combined_df)
    else:
        print('Failed to download ZIP files')

df = pd.concat(df_year, ignore_index=True)
df.dropna(axis=1, inplace= True)

In [103]:
df['Forecast Date'] = pd.to_datetime(df['Forecast Date'])
df['Vintage Date'] = pd.to_datetime(df['Vintage Date'])

index = df['Vintage Date'] == (df['Forecast Date'] + pd.DateOffset(days=1))
df['Forecast Date'] + pd.DateOffset(days=1)

0      2020-01-02
1      2020-01-02
2      2020-01-02
3      2020-01-02
4      2020-01-02
          ...    
2191   2021-01-01
2192   2021-01-01
2193   2021-01-01
2194   2021-01-01
2195   2021-01-01
Name: Forecast Date, Length: 2196, dtype: datetime64[ns]

In [104]:
def compute_HDD(max_temp, min_temp, base_temp= 65):
    """
    Parameters:
        max_temp (float): The maximum temperature for the day in Fahrenheit.
        min_temp (float): The minimum temperature for the day in Fahrenheit.
        base_temp (float): The base temperature in Fahrenheit (default is 65°C).
        
    Returns:
        float: The Heating Degree Days for the day.
    """
    avg_temp = (max_temp + min_temp) / 2
    hdd = max(0, base_temp - avg_temp)
    return hdd

def compute_CDD(max_temp, min_temp, base_temp= 65):
    """
    Parameters:
        max_temp (float): The maximum temperature for the day in Fahrenheit.
        min_temp (float): The minimum temperature for the day in Fahrenheit.
        base_temp (float): The base temperature in Fahrenheit (default is 65°C).
        
    Returns:
        float: The Heating Degree Days for the day.
    """
    avg_temp = (max_temp + min_temp) / 2
    hdd = max(0, avg_temp - base_temp)
    return hdd


In [105]:
df['HDD'] = df.apply(lambda x: compute_HDD(max_temp= x['Max Temp'], min_temp= x['Min Temp']), axis= 1)
df['CDD'] = df.apply(lambda x: compute_CDD(max_temp= x['Max Temp'], min_temp= x['Min Temp']), axis= 1)
df.head()

Unnamed: 0,Forecast Date,Vintage Date,Vintage,Station ID,Max Temp,Min Temp,Max Wet Bulb,Min Wet Bulb,HDD,CDD
0,2020-01-01,2020-01-01,Forecast,ISP,42,32,38,29,28.0,0.0
1,2020-01-01,2020-01-01,Forecast,JFK,42,34,38,30,27.0,0.0
2,2020-01-01,2020-01-01,Forecast,LGA,42,36,36,31,26.0,0.0
3,2020-01-01,2020-01-02,Forecast,ISP,44,29,39,27,28.5,0.0
4,2020-01-01,2020-01-02,Forecast,JFK,46,32,40,29,26.0,0.0


# Load Forecast

In [112]:
url = 'http://mis.nyiso.com/public/'
df_year = []
year = 20200101
for i in range(12):
    url_date = increment_date_by_month(year, i)
    zip_url = f'csv/isolf/{url_date}isolf_csv.zip'

    #GET request
    response = requests.get(url+zip_url)

    #check if it went through
    if response.status_code == 200:
        #extract zip file
        with zipfile.ZipFile(io.BytesIO(response.content), 'r') as zip_ref:
            file_list = zip_ref.namelist()
            #initialize an empty list
            dfs =[]

            #loop through CSV
            for file_name in file_list:
                if file_name.endswith('.csv'):
                    with zip_ref.open(file_name) as csv_file:
                        df = pd.read_csv(csv_file)
                                          
                    dfs.append(df)
            
            #merge dataframes
            combined_df = pd.concat(dfs, ignore_index=True)
            df_year.append(combined_df)
    else:
        print('Failed to download ZIP files')

load_forecast = pd.concat(df_year, ignore_index=True)
load_forecast.dropna(axis=1, inplace=True)

# # Convert 'Time Stamp' to datetime and format it to include seconds
# load_forecast['Time Stamp'] = pd.to_datetime(load_forecast['Time Stamp'], format="%m/%d/%Y %H:%M")

# # Add seconds to 'Time Stamp'
# load_forecast['Time Stamp'] = load_forecast['Time Stamp'].dt.strftime("%Y/%m/%d %H:%M:%S")

load_forecast.set_index('Time Stamp', inplace=True)
# Set 'Time Stamp' as index
load_forecast['Longil'][:50]

Time Stamp
01/01/2020 00:00    2012
01/01/2020 01:00    1896
01/01/2020 02:00    1798
01/01/2020 03:00    1737
01/01/2020 04:00    1721
01/01/2020 05:00    1748
01/01/2020 06:00    1795
01/01/2020 07:00    1786
01/01/2020 08:00    1777
01/01/2020 09:00    1767
01/01/2020 10:00    1781
01/01/2020 11:00    1814
01/01/2020 12:00    1874
01/01/2020 13:00    1918
01/01/2020 14:00    1974
01/01/2020 15:00    2062
01/01/2020 16:00    2279
01/01/2020 17:00    2580
01/01/2020 18:00    2600
01/01/2020 19:00    2559
01/01/2020 20:00    2492
01/01/2020 21:00    2372
01/01/2020 22:00    2193
01/01/2020 23:00    2007
01/02/2020 00:00    1892
01/02/2020 01:00    1822
01/02/2020 02:00    1777
01/02/2020 03:00    1768
01/02/2020 04:00    1807
01/02/2020 05:00    1912
01/02/2020 06:00    2108
01/02/2020 07:00    2209
01/02/2020 08:00    2186
01/02/2020 09:00    2131
01/02/2020 10:00    2071
01/02/2020 11:00    2028
01/02/2020 12:00    2000
01/02/2020 13:00    2022
01/02/2020 14:00    2090
01/02/2020 15:

# Interface Flows

In [107]:
url = 'http://mis.nyiso.com/public/'
df_year = []
year = 20200101
for i in range(12):
    url_date = increment_date_by_month(year, i)
    zip_url = f'csv/ExternalLimitsFlows/{url_date}ExternalLimitsFlows_csv.zip'

    #GET request
    response = requests.get(url+zip_url)

    #check if it went through
    if response.status_code == 200:
        #extract zip file
        with zipfile.ZipFile(io.BytesIO(response.content), 'r') as zip_ref:
            file_list = zip_ref.namelist()
            #initialize an empty list
            dfs =[]

            #loop through CSV
            for file_name in file_list:
                if file_name.endswith('.csv'):
                    with zip_ref.open(file_name) as csv_file:
                        df = pd.read_csv(csv_file)
                                          
                    dfs.append(df)
            
            #merge dataframes
            combined_df = pd.concat(dfs, ignore_index=True)
            df_year.append(combined_df)
    else:
        print('Failed to download ZIP files')

flows = pd.concat(df_year, ignore_index=True)
flows['Timestamp'] = pd.to_datetime(flows['Timestamp'])

flows.set_index('Timestamp', inplace= True)
flows.drop(columns= ['Interface Name', 'Point ID'], inplace= True)

In [108]:
flows_5m = flows.resample('5min').sum()
flows_60m = flows_5m.resample('60min').mean()
flows_60m

Unnamed: 0_level_0,Flow (MWH),Positive Limit (MWH),Negative Limit (MWH)
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-01 00:00:00,15628.028333,45813.000000,-79558.0
2020-01-01 01:00:00,15598.565000,46070.500000,-79558.0
2020-01-01 02:00:00,15420.762500,46158.000000,-79558.0
2020-01-01 03:00:00,14731.755833,46138.000000,-79558.0
2020-01-01 04:00:00,14189.192500,46128.000000,-79558.0
...,...,...,...
2020-12-31 19:00:00,19031.432500,45848.000000,-79368.0
2020-12-31 20:00:00,19378.762500,45843.000000,-79368.0
2020-12-31 21:00:00,18693.134167,45843.000000,-79368.0
2020-12-31 22:00:00,19711.463333,45751.333333,-79368.0
