In [1]:
import statsmodels.api as sm
import warnings
import matplotlib
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.pyplot import acorr
%matplotlib inline
 
warnings.filterwarnings("ignore")
import pandas as pd
pd.plotting.register_matplotlib_converters()
import numpy as np
import seaborn as sns

import plotly.express as px
from sklearn.ensemble import IsolationForest
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split

import lightgbm as lgb

color_pal= sns.color_palette()
plt.style.use('fivethirtyeight')

mpl.rcParams['figure.figsize']=(10,5)
mpl.rcParams['axes.grid']= False

ModuleNotFoundError: No module named 'lightgbm'

In [2]:
pip install lightgbm

Defaulting to user installation because normal site-packages is not writeable
Collecting lightgbm
  Downloading lightgbm-3.3.2-py3-none-win_amd64.whl (1.0 MB)
     ---------------------------------------- 1.0/1.0 MB 350.7 kB/s eta 0:00:00
Installing collected packages: lightgbm
Successfully installed lightgbm-3.3.2
Note: you may need to restart the kernel to use updated packages.


In [None]:
edata = pd.read_csv("energydata.csv")

In [None]:
edata.head()

In [None]:
edata['date']= pd.to_datetime(edata['date'], infer_datetime_format=True)

In [None]:
edata= edata.set_index('date').resample('H').mean().reset_index() #aggregating into hourly basis

In [None]:
edata.set_index('date', inplace=True)

In [None]:
def create_features(edata):
    """
    create time series features based on time series index
    """
    edata['hour']=edata.index.hour
    edata['dayofweek']= edata.index.dayofweek
    edata['month']=edata.index.month
    edata['year']=edata.index.year
    edata['dayofmonth']=edata.index.day
    edata['weekofyear']=edata.index.isocalendar().week.astype(np.int64)
    return edata

edata=create_features(edata)

In [None]:
def add_lags(edata):
    target_map= edata['appl'].to_dict()
    edata['lag1']=(edata.index- pd.Timedelta('30 days')).map(target_map)
    edata['lag2']=(edata.index- pd.Timedelta('60 days')).map(target_map)
    edata['lag3']=(edata.index- pd.Timedelta('90 days')).map(target_map)
    edata['lag4']=(edata.index- pd.Timedelta('120 days')).map(target_map)
    #edata['lag5']=(edata.index- pd.Timedelta('150 days')).map(target_map)
    return edata

In [None]:
edata=add_lags(edata)

In [None]:
train=create_features(train)
test=create_features(test)

In [None]:
edata.columns

In [None]:
X= ['lgt', 'temp1', 'rh_1', 'temp2', 'rh_2', 'temp3', 'rh_3',
       'temp4', 'rh_4', 'temp5', 'rh_5', 'temp6', 'rh_6', 'temp7', 'rh_7',
       'temp8', 'rh_8', 'temp9', 'rh_9', 'temp_out', 'press', 'rh_out',
       'windspeed', 'visibility', 'dewpoint', 'rv1', 'rv2', 'hour',
       'dayofweek', 'month', 'year', 'dayofmonth', 'weekofyear', 'lag1',
       'lag2', 'lag3', 'lag4']
y= ['appl']

In [None]:
train=edata.loc[edata.index < '2016-04-16']
test= edata.loc[edata.index >= '2016-04-16']

In [None]:
print(train.shape) 
print(test.shape) 

In [None]:
X_train_split, X_test_split, y_train_split, y_test_split= train_test_split(X_train, y_train, test_size=0.3, random_state=False)

In [None]:
n_folds=5
folds=TimeSeriesSplit(n_splits=n_folds)
folds=KFold(n_splits=5)

In [None]:
X_train=train[X]
y_train=train[y]

X_test=test[X]
y_test=test[y]

In [None]:
reg= lgb.LGBMRegressor()

In [None]:
reg.get_params()

In [None]:
params={}

In [None]:
fit_params={[X_test, y_test], objective='regression', boosting_type='gbdt', importance_type='split', max_depth=3, n_estimators=500, learning_rate=0.1, early_stoppping_rounds=200, verbose=True}
reg.fit(X_train, y_train, **fit_params)