In [None]:
# -----********************-----

# Created Time: 2024/12/09

# Last updated: 2024/12/11

# Author: Tara Liu, Yiyi He

### Use Case

# This notebook explores the application of autoregressive models
# 1. 

# -----********************-----

In [29]:
# Import libraries
import os
# Stats
from statsmodels.tsa.api import ARDL
import statsmodels.api as sm
import numpy as np
from statsmodels.tsa.ardl import ardl_select_order

# Geo
from shapely.geometry import Point, Polygon
import geopandas as gpd
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.options.display.max_rows = 1000

# Plot
import matplotlib.pyplot as plt
import seaborn as sns
# Processing
from tqdm import tqdm

In [53]:
# Define functions
def find_lag(df, maxlag):

    print(f'testing maxlag: {maxlag}')
    time_start = pd.Timestamp.now()
    
    sel_res = ardl_select_order(
        df['pct_blackout'],
        exog=df[['t2m', 'wind_speed', 'tp', 'wind_speed_forest_cover']],
        maxlag=maxlag,
        ic='aic',
        maxorder=maxlag
        )
    
    time_selected = pd.Timestamp.now()
    print(f'time elapsed for selecting order: {time_selected-time_start}')
    
    return sel_res.model.ardl_order

In [19]:
# Load input dataframe
df_518 = pd.read_csv("/Users/yiyi/Desktop/df_final_geo_518.csv", index_col=0)

# Add interaction term: forest cover * wind speed
df_518['wind_speed_forest_cover'] = df_518['wind_speed'] * df_518['tree2000']
df_518.head(3)

  df_518 = pd.read_csv("/Users/yiyi/Desktop/df_final_geo_518.csv", index_col=0)


Unnamed: 0_level_0,index_x,datetime,Location name_x,ESMI_ID,District,State,Category,Connection Type_x,Lat,Lon,t2m,u10,v10,tp,date,time,station_id,index_y,pct_blackout,wind_speed,Connection Type_y,climate_zone_DN,climate_zone_name,climate_zone_code,tree2000,gain,lossyear,geometry,wind_forest_cover,climate_zone_code_2_cat,climate_zone_code_2_Am,climate_zone_code_2_Aw,climate_zone_code_2_BSh,climate_zone_code_2_Cw,wind_speed_forest_cover
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
0,2569489,2014-11-17 17:00:00,Vijay Nagar,,Hyderabad,Telangana,State Capital,Domestic,17.394295,78.451016,295.09723,-1.271197,-0.320305,0.0005645638,20141117,1700,495,7239294,0.0,1.31093,Domestic,3.0,"Tropical, savannah",Aw,0.0,0.0,2000.0,POINT (78.45101625 17.39429481),0.0,1,0,1,0,0,0.0
1,2569493,2014-11-29 17:00:00,Vijay Nagar,,Hyderabad,Telangana,State Capital,Domestic,17.394295,78.451016,293.0487,-1.548639,-1.584031,0.0,20141129,1700,495,7239582,0.0,2.215274,Domestic,3.0,"Tropical, savannah",Aw,0.0,0.0,2000.0,POINT (78.45101625 17.39429481),0.0,1,0,1,0,0,0.0
2,2569500,2014-11-30 22:00:00,Vijay Nagar,,Hyderabad,Telangana,State Capital,Domestic,17.394295,78.451016,289.83447,-1.289795,-1.528702,4.351137e-07,20141130,2200,495,7239611,0.0,2.000125,Domestic,3.0,"Tropical, savannah",Aw,0.0,0.0,2000.0,POINT (78.45101625 17.39429481),0.0,1,0,1,0,0,0.0
3,2569503,2014-11-13 07:00:00,Vijay Nagar,,Hyderabad,Telangana,State Capital,Domestic,17.394295,78.451016,297.1411,-3.871719,0.47276,0.0006666072,20141113,700,495,7239188,0.0,3.900476,Domestic,3.0,"Tropical, savannah",Aw,0.0,0.0,2000.0,POINT (78.45101625 17.39429481),0.0,1,0,1,0,0,0.0
4,2569506,2014-12-31 15:00:00,Vijay Nagar,,Hyderabad,Telangana,State Capital,Domestic,17.394295,78.451016,296.46838,-2.901352,0.71742,2.732359e-06,20141231,1500,495,7240348,0.0,2.988735,Domestic,3.0,"Tropical, savannah",Aw,0.0,0.0,2000.0,POINT (78.45101625 17.39429481),0.0,1,0,1,0,0,0.0


In [50]:
df_518[df_518.tree2000.isna()]

Unnamed: 0_level_0,index_x,datetime,Location name_x,ESMI_ID,District,State,Category,Connection Type_x,Lat,Lon,t2m,u10,v10,tp,date,time,station_id,index_y,pct_blackout,wind_speed,Connection Type_y,climate_zone_DN,climate_zone_name,climate_zone_code,tree2000,gain,lossyear,geometry,wind_forest_cover,climate_zone_code_2_cat,climate_zone_code_2_Am,climate_zone_code_2_Aw,climate_zone_code_2_BSh,climate_zone_code_2_Cw,wind_speed_forest_cover
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
104472,7851772,2022-11-13 20:00:00,Borbheta,886.0,Jorhat,Assam,,,26.735158,94.203708,291.02948,-0.105525,0.629868,1.118704e-06,20221113,2000,554,4876531,0.033333,0.638646,,,,,,,,,,-1,0,0,0,0,
104473,7851773,2022-12-16 07:00:00,Borbheta,886.0,Jorhat,Assam,,,26.735158,94.203708,296.72290,-0.628265,-0.822326,5.190225e-07,20221216,700,554,4877310,0.000000,1.034861,,,,,,,,,,-1,0,0,0,0,
104474,7851775,2022-10-15 07:00:00,Borbheta,886.0,Jorhat,Assam,,,26.735158,94.203708,302.28442,0.781073,-0.811333,1.939070e-04,20221015,700,554,4875822,0.000000,1.126204,,,,,,,,,,-1,0,0,0,0,
104475,7851777,2022-12-28 07:00:00,Borbheta,886.0,Jorhat,Assam,,,26.735158,94.203708,292.75867,-0.808284,-0.723110,1.494995e-03,20221228,700,554,4877598,0.000000,1.084533,,,,,,,,,,-1,0,0,0,0,
104476,7851782,2022-10-08 22:00:00,Borbheta,886.0,Jorhat,Assam,,,26.735158,94.203708,296.88013,0.937381,0.054934,1.055375e-02,20221008,2200,554,4875669,0.000000,0.938989,,,,,,,,,,-1,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3432731,40018615,2020-01-01 00:00:00,Borbheta,886.0,Jorhat,Assam,,,26.735158,94.203708,284.25266,-0.736793,-1.007421,8.523463e-07,20200101,0,554,4874415,0.000000,1.248103,,,,,,,,,,-1,0,0,0,0,
3432732,40019192,2020-01-01 01:00:00,Borbheta,886.0,Jorhat,Assam,,,26.735158,94.203708,284.06710,-0.937087,-0.832255,0.000000e+00,20200101,100,554,4874416,0.000000,1.253308,,,,,,,,,,-1,0,0,0,0,
3432733,40019517,2020-01-01 05:00:00,Borbheta,886.0,Jorhat,Assam,,,26.735158,94.203708,294.77924,-0.860615,-0.798433,0.000000e+00,20200101,500,554,4874420,0.000000,1.173948,,,,,,,,,,-1,0,0,0,0,
3432734,40020790,2020-01-01 02:00:00,Borbheta,886.0,Jorhat,Assam,,,26.735158,94.203708,286.21185,-0.684691,-0.256471,0.000000e+00,20200101,200,554,4874417,0.000000,0.731149,,,,,,,,,,-1,0,0,0,0,


In [54]:
# Testing on a small selection of stations
station_id = 232
station_df = df_518[df_518['station_id'] == station_id]
station_df['datetime'] = pd.to_datetime(station_df['datetime'])

find_lag(station_df, 5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  station_df['datetime'] = pd.to_datetime(station_df['datetime'])
  self._init_dates(dates, freq)


testing maxlag: 5
time elapsed for selecting order: 0 days 00:01:58.289826


  self._init_dates(dates, freq)


(4, 0, 1, 0)

In [44]:
# Testing on a small selection of stations
station_id = 125
station_df = df_518[df_518['station_id'] == station_id]
station_df['datetime'] = pd.to_datetime(station_df['datetime'])

find_lag(station_df, 5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  station_df['datetime'] = pd.to_datetime(station_df['datetime'])
  self._init_dates(dates, freq)


testing maxlag: 5
time elapsed for selecting order: 0 days 00:00:50.657680


  self._init_dates(dates, freq)


(1, 0, 2, 0)

In [45]:
# Testing on a small selection of stations
station_id = 161
station_df = df_518[df_518['station_id'] == station_id]
station_df['datetime'] = pd.to_datetime(station_df['datetime'])

find_lag(station_df, 5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  station_df['datetime'] = pd.to_datetime(station_df['datetime'])
  self._init_dates(dates, freq)


testing maxlag: 5
time elapsed for selecting order: 0 days 00:00:55.545937


  self._init_dates(dates, freq)


(1, 0, 0)