In [14]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [15]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import scipy
import scipy.stats
from scipy.stats import pearsonr

In [16]:
#import gc
#gc.collect()

In [17]:
df = pd.read_csv('../input/household-power-consumption/household_power_consumption.txt', sep = ';', header=0,
                 low_memory=False, infer_datetime_format=True,
                 parse_dates={'datetime':[0,1]}, index_col=['datetime'])

In [18]:
df.head()

In [19]:
# point out all missing values
df.replace('?', 'nan', inplace=True) 
# make dataset numeric
df = df.astype('float32')

In [20]:
df.shape

In [21]:
df.isnull().sum()

In [23]:
df = df.fillna(df.mean())

In [24]:
df.isnull().sum()

In [25]:
df_res = df.resample('D').sum()
print(df_res.head())

In [29]:
plt.figure(figsize=(10,6))
plt.plot(df_res.index, df_res.Global_active_power, '--', marker='*', )
plt.grid()
plt.xlabel('datetime')
plt.ylabel('Global active power')
plt.title('Plot of Global_active_power over time')

In [34]:
df_res['Voltage'].corr(df_res['Global_active_power'])

In [35]:
df_train = df_res.iloc[:1077,:]
df_train.shape

In [36]:
df_test = df_res.iloc[1077:,:]
df_test.shape

In [37]:
from fbprophet import Prophet

In [38]:
df_train.reset_index(inplace = True)
df_train.head()

In [39]:
df_train = df_train.iloc[:,:2]             

In [40]:
df_train = df_train.rename(columns={"datetime": "ds", "Global_active_power": "y"})
df_train.head()

In [41]:
df_test.reset_index(inplace = True)
df_test.head()

In [42]:
df_test = df_test.iloc[:,:2]

In [44]:
df_test = df_test.rename(columns={"datetime": "ds", "Global_active_power": "y"})
df_test.head()

In [45]:
model = Prophet()
model.fit(df_train)

In [46]:
forecast = model.predict(df_test) 
forecast.head()

In [47]:
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper', 'trend', 'trend_lower', 'trend_upper']].head()
#yhat is the prediction while yhat_lower and yhat_upper are the upper and lower boundaries

In [48]:
model.plot(forecast)
plt.show()

In [49]:
from sklearn import metrics
def Metric(y_true,y_pred):
  y_true,y_pred = np.array(y_true), np.array(y_pred)
  return np.mean(np.abs((y_true - y_pred)/y_true)) *100

In [51]:
MAPE = Metric(df_test['y'],forecast['yhat'])
print(f'the Mean Average Percentage Error ( MAPE) is: {round(MAPE,2)}')

In [53]:
# RMSE
RMSE = np.sqrt(metrics.mean_squared_error(df_test['y'],forecast['yhat']))
RMSE
round(RMSE,2)

In [54]:
model.plot_components(forecast)

In [55]:
df_res.reset_index(inplace=True)
df_res.head()

In [57]:
df_res2 = df_res.rename(columns = {'datetime':'ds','Global_active_power':'y',
                                           'Global_reactive_power':'GRP','Voltage':'V',
                                           'Global_intensity':'GI','Sub_metering_1':'SM1',
                                          'Sub_metering_2':'SM2','Sub_metering_3':'SM3'})
df_res2.head()

In [58]:
train2 = df_res2.iloc[:1077,:]
test2 = df_res2.iloc[1077:,:]

In [61]:
model = Prophet(daily_seasonality=True)
model.add_regressor('GRP')
model.add_regressor('V')
model.add_regressor('GI')
model.add_regressor('SM1')
model.add_regressor('SM2')
model.add_regressor('SM3')

In [62]:
model = model.fit(train2)
forecast_2 = model.predict(test2)
forecast_2.head()

In [63]:
model.plot(forecast_2)
plt.show()

In [65]:
def Metric(y_true,y_pred):
  y_true,y_pred = np.array(y_true), np.array(y_pred)
  return np.mean(np.abs((y_true - y_pred)/y_true)) *100

MAPE = Metric(test2['y'],forecast_2['yhat'])
round(MAPE,2)

In [66]:
# RMSE
RMSE = np.sqrt(metrics.mean_squared_error(test2['y'],forecast_2['yhat']))
RMSE
round(RMSE,2)

In [67]:
model.plot_components(forecast_2)