In [1]:

import numpy as np 
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:

res = []
for dirname,_,filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        filepath = os.path.join(dirname,filename)
        print(filepath)
        res.append(
            pd.read_csv(
                filepath,
                names=['id','date','time','consumption'],
                parse_dates= {'datetime':['date','time']},
                index_col='id',
                header=0,
                date_parser=lambda x,y: pd.to_datetime(x+' '+y,format='%d %b %Y %H:%M:%S')
            )
        )

In [3]:
df = pd.concat(res,ignore_index=True).sort_values('datetime')

ValueError: No objects to concatenate

In [None]:
print(f"min date: {df['datetime'].min()}, max date: {df['datetime'].max()}")
print(f"range: {df['datetime'].max()-df['datetime'].min()}")

In [None]:
#daily energy consumption
df.sort_values('datetime').rolling('1D',on='datetime').sum().head()

In [None]:
import matplotlib.pyplot as plt
df.rolling('1H',on='datetime').sum().loc[df['consumption']>0.5].plot(x='datetime',y='consumption',title="Hourly energy consumption",figsize=(20,6))

In [None]:
# Monthly total consumption and daily average consumption for a month
daily_cons = df.rolling('1d',on='datetime').sum()
daily_cons.groupby(daily_cons['datetime'].dt.month)['consumption'].apply(lambda x: {'sum':x.sum(),'average':x.mean(),'count':x.count()})

In [None]:


daily_cons.plot(x='datetime',y='consumption',title="Daily energy consumption",figsize=(10,6))
plt.show()

In [None]:
daily_cons.describe()

In [None]:
#Separating day and night consumption time

day_consumption = df.loc[(pd.Timestamp('06:00').time()<=df['datetime'].dt.time)&(df['datetime'].dt.time<pd.Timestamp('18:00').time())&(df['consumption']>0.5)]
night_consumption = df.loc[set(df.index)-set(day_consumption.index)].sort_values('datetime')
night_consumption = night_consumption.loc[night_consumption['consumption']>0.5]

In [None]:
fig,ax= plt.subplots(1,1,figsize=(20,6))
day_hourly_cons = day_consumption.rolling('1H',on='datetime').sum()
night_hourly_cons = night_consumption.rolling('1H',on='datetime').sum()
ax.plot(day_hourly_cons['datetime'],day_hourly_cons['consumption'],'r-',label='Day Consumption')
ax.plot(night_hourly_cons['datetime'],night_hourly_cons['consumption'],'b-',label='Night Consumption')
ax.legend()
ax.set_title("Hourly Energy Consumption")
ax.set_xlabel("Date")
ax.set_ylabel("Energy Consumption")
plt.show()

In [None]:
daily_day_cons= day_consumption.rolling('1d',on='datetime').sum()
daily_night_cons= night_consumption.rolling('1d',on='datetime').sum()

In [None]:
fig,ax= plt.subplots(1,1,figsize=(15,7))

ax.plot(daily_day_cons['datetime'],daily_day_cons['consumption'],'r-',label='Day Consumption')
ax.plot(daily_night_cons['datetime'],daily_night_cons['consumption'],'b-',label='Night Consumption')
ax.legend()
ax.set_title("Daily Energy Consumption")
ax.set_xlabel("Date")
ax.set_ylabel("Energy Consumption")
plt.show()

# **TimeSeries Forecasting**

In [None]:
#Aggregating to day 

df_copy = df.set_index('datetime').sort_index()
df_copy = df_copy.resample('1D').sum()

**Additive Decompostion**

In [None]:
#TimeSeries Additive Decomposition

from statsmodels.tsa.seasonal import seasonal_decompose

result = seasonal_decompose(df_copy,model='additive')
plt.rc("figure", figsize=(15,6))
result.plot()
plt.show()

**Multiplicative Decomposition**

# AutoRegression Forecasting

# **Moving Average Forecasting**

In [None]:
from statsmodels.tsa.arima.model import ARIMA

model = ARIMA(df_copy,order=(0,0,1))
model_fit = model.fit()

next_one_m = model_fit.predict('2022-03-01','2022-03-31')


In [None]:
fig,ax= plt.subplots(1,1,figsize=(15,7))

ax.plot(df_copy['consumption'],'r-',label='Daily total Consumption')
ax.plot(next_one_m,'b-',label='Daily predicted total Consumption')
ax.legend()
ax.set_title("Daily Energy Predicted Consumption using MA")
ax.set_xlabel("Date")
ax.set_ylabel("Energy Consumption")
plt.show()

# **Autoregressive Integrated Moving Average (ARIMA)**

In [None]:
model = ARIMA(df_copy,order=(10,1,1))
model_fit = model.fit()

next_one_m = model_fit.predict('2022-03-01','2022-03-31')

In [None]:
fig,ax= plt.subplots(1,1,figsize=(15,7))

ax.plot(df_copy['consumption'],'r-',label='Daily total Consumption')
ax.plot(next_one_m,'b-',label='Daily predicted total Consumption')
ax.legend()
ax.set_title("Daily Energy Predicted Consumption using ARIMA")
ax.set_xlabel("Date")
ax.set_ylabel("Energy Consumption")
plt.show()