In [None]:
from fbprophet import Prophet
from fbprophet.plot import plot_weekly, plot_yearly
from matplotlib import pyplot as plt
import numpy as np
import os
import pandas as pd
from pathlib import Path
import requests
from sklearn.metrics import mean_squared_error as mse
import warnings
from zipfile import ZipFile

In [None]:
sns.set() # For setting my preferred seaborn theme
warnings.filterwarnings('ignore')
pd.options.display.float_format = "{:.2f}".format

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00235/household_power_consumption.zip'
file = requests.get(url, allow_redirects=True) # downloading the zip file

In [None]:
downloads_path = str(Path.home()/'Downloads')
file_name = 'house_power_comp.zip'
file_name = '\\' + file_name

In [None]:
# Creating the zip file in the downloads folder of the user's computer
with open(downloads_path + file_name, 'wb') as f:
    f.write(file.content)

In [None]:
# Create a folder to dump the extracted files in
try:
    os.mkdir(Path.home()/'Downloads'/'new folder2')
except FileExistsError:
    pass

# Unzipping the compressed file in the 'new folder2' in the users' downloads folder
with ZipFile(downloads_path + file_name, 'r') as zipped_file:
    print('contained in the zipped file:', zipped_file.namelist())
    zipped_file.extractall(downloads_path + '\\new folder')

In [None]:
# Reading the data into a pandas dataframe
df = pd.read_csv(str(Path.home()/'Downloads'/'new folder') + "\\" + zipped_file.namelist()[0], sep=';', 
                 parse_dates={"DateTime":['Date','Time']})
df.head()

In [None]:
df_copy = df.copy()
df_copy

In [None]:
df_copy.dtypes

In [None]:
df_copy.describe(include='all')

In [None]:
# df_copy.select_dtypes()

In [None]:
df_copy.isna().sum()

In [None]:
df_copy.loc[6839]

In [None]:
#replacing the "?" with NaN
df_copy.replace('?', np.nan, inplace=True)

df_copy.isna().sum()

In [None]:
# Converting the object type columns to floats
for col_name in df_copy.columns[1:]:
    df_copy[col_name]=df_copy[col_name].astype('float')

df_copy.dtypes

In [None]:
# Replacing the missing values with the mean of the corresponding column values
for col in df_copy.columns[1:]:
    df_copy[col].fillna(df_copy[col].mean(), inplace=True)

df_copy.isna().sum()

In [None]:
# Replacing the missing values with the value that preceeds the missing value.
for col in df_copy.columns[1:]:
    df_copy[col].fillna(method='ffill', inplace=True)

df_copy.isna().sum()

In [None]:
# Setting the DateTime column as the index of the dataframe
df_copy.set_index('DateTime', inplace=True)

In [None]:
# Downsampling
df_day = df_copy.resample('D').mean()

df_day

In [None]:
df_day.isna().sum()

In [None]:
for col in df_day.columns:
    df_day[col].fillna(df_day[col].mean(), inplace=True)

<h2>Question 11</h2>

In [None]:
df_day.Global_active_power.plot(figsize=(16,6))
plt.title('Global Active Power Over Time')
plt.xlabel('Period')
plt.ylabel('Global Active Power Value')
plt.show()

<h2>Question 12</h2>

In [None]:
# Pearson correlation between Global active power and Global reactive power

round(df_day.Global_active_power.corr(df_day.Global_reactive_power), 2)

<h2>Question 13</h2>

In [None]:
# 

round(df_day.Voltage.corr(df_day.Global_intensity), 2)

<h2>Question 14</h2>

In [None]:
# Retrieveing the required data for the model
model_data = df_day.reset_index()[['DateTime', 'Global_active_power']]
model_data.rename(columns={'DateTime': 'ds', 'Global_active_power': 'y'}, inplace=True)
model_data

In [None]:
train_set = model_data[:-365]
train_set

In [None]:
test_set = model_data[-365:]
test_set.reset_index(drop=True, inplace=True)
test_set

In [None]:
prophet = Prophet()

In [None]:
prophet.fit(train_set)

In [None]:
y_pred = model.predict(test_set)
y_pred[['ds', 'yhat', 'trend']]

In [None]:
mape = round((np.mean(np.abs(np.array(test_set['y']) - np.array(y_pred.yhat[-365:])) / np.array(test_set['y'])) * 100), 2)
print(f'Mean Absolute Percentage Error is {mape}')

<h2>Question 15</h2>

In [None]:
rmse = round(np.sqrt(mse(np.array(test_set['y']), np.array(y_pred['yhat']))), 2)
print(f'Root Mean Squared Error is {rmse}')

<h2>Question 16</h2>

In [None]:
plot_yearly(prophet)

<h2>Question 17</h2>

In [None]:
data = df_day.rename(columns = {'Datetime':'ds','Global_active_power':'y', 'Global_reactive_power':'add1',
                                'Voltage':'add2', 'Global_intensity':'add3','Sub_metering_1':'add4', 
                                'Sub_metering_2':'add5','Sub_metering_3':'add6'})

data

In [None]:
train_set_2 =data[:-365]
train_set_2

In [None]:
test_set_2 = data[-365:]
test_set_2

In [None]:
second_prophet = Prophet()
second_prophet.add_regressor('add1')
second_prophet.add_regressor('add2')
second_prophet.add_regressor('add3')
second_prophet.add_regressor('add4')
second_prophet.add_regressor('add5')
second_prophet.add_regressor('add6')

In [None]:
second_prophet.fit(train_set_2)
 
y_pred_2 = second_prophet.predict(test_set_2)

y_pred_2

In [None]:
mape_2 = round((np.mean(np.abs(np.array(test_set_2['y']) - np.array(y_pred_2['yhat'])) / np.array(test_set_2['y'])) * 100), 2)
print(f'Mean Absolute Percentage Error is {mape_2}')

<h2>Question 18</h2>

In [None]:
rmse_2 = round(np.sqrt(mse(np.array(test_set_2['y']), np.array(y_pred_2['yhat']))), 2)
print(f'Root Mean Squared Error is {rmse_2}')

<h2>Question 19</h2>

In [None]:
plot_weekly(second_prophet)