# Imports

In [None]:
import math
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from pprint import pprint
from os.path import exists
from datetime import datetime
from sklearn import preprocessing
from wwo_hist import retrieve_hist_data

# Utilities functions

**Picture size**

In [None]:
rb_scaler = preprocessing.RobustScaler()

In [None]:
plt.rcParams["figure.figsize"] = [10, 7]

**Functions**

In [None]:
def power_factor(active_power, voltage, current):
    return active_power*1000/(voltage*current)

In [None]:
def active_energy(global_active_power, sub_metering_1, sub_metering_2, sub_metering_3):
    active_energy = (global_active_power*1000/60) - sub_metering_1 - sub_metering_2 - sub_metering_3
    return active_energy

In [None]:
def infer_season(datetime_object):
    
    datetime_object = str(datetime_object).split()[0]
    datetime_object = int(datetime_object.split("-")[1])
    
    if 1 <= datetime_object <= 2:
        return "winter"
    if 3 <= datetime_object <= 5:
        return "spring"
    if 6 <= datetime_object <= 8:
        return "summer"
    if 9 <= datetime_object <= 12:
        return "autumn"

In [None]:
def robust_scale(df):
    scaled_dataset = df.to_numpy()
    scaled_dataset = rb_scaler.fit_transform(scaled_dataset)
    scaled_dataset = pd.DataFrame(columns = df.columns.tolist(), data = scaled_dataset)
    return df

In [None]:
def sub_plot_resampling(dataset, name, period, title, rotation = None, kind = None, color = "blue"):
    resample_df = dataset[name].resample(period).agg(['sum', 'mean', 'std'])
    if kind:
        resample_df.plot(subplots = True, title = title, color = color, kind = kind)
    else:
        resample_df.plot(subplots = True, title = title, color = color)
    if rotation:
        plt.xticks(rotation=rotation)
    plt.show()

In [None]:
def round_time_hour(dataset, index):
    dataset["time"] = index
    dataset["timestamps"] = index
    dataset = dataset.set_index("timestamps", inplace = False)
    timelist = [x.to_pydatetime() for x in dataset["time"].tolist()]
    timelist = [dt.replace(minute=0, second=0, microsecond=0) for dt in timelist]
    dataset["time"] = timelist
    return dataset

# Load Data

In [None]:
dataset = pd.read_csv("household_electric_consumption.csv", 
                      sep = ",",
                      parse_dates={'timestamps' : ['Date', 'Time']},
                      infer_datetime_format = True,
                      na_values = ["nan", "?"],
                      low_memory = False,
                      index_col = "timestamps")

In [None]:
dataset = dataset.drop(["Unnamed: 0"], axis = 1)

# Data Discovery

In [None]:
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.describe()

As shown below, three out of seven features have huge ranges (large number of unique values). Therefore, the data will be standardized in preparation for NN training.

In [None]:
for name in dataset.columns.tolist():
    print("Range of {} column: {} --> {}".format(name, dataset[name].min(), dataset[name].max()))
    print("Number of unique values in {} column: {}".format(name, len(set(dataset[name].tolist()))))
    print("\n")

# Cleaning

**Replace all Null values with mean of each column**

In [None]:
dataset = dataset.fillna(dataset.mean())

**Sanity Check**

In [None]:
dataset.isnull().sum()

**Remove outliers: Robust Scaling**

In [None]:
# Robust scaling full dataset
robust_full = dataset.reset_index(drop = True)
robust_full = robust_scale(robust_full)

In [None]:
dataset = round_time_hour(robust_full, dataset.index)
dataset["season"] = dataset.time.apply(lambda x: infer_season(x))

# Feature Engineering

### Power Factor

In [None]:
voltage = dataset["Voltage"].tolist()
global_active_power = dataset["Global_active_power"].tolist()
global_current_intensity = dataset["Global_intensity"].tolist()
zipped_list = list(zip(global_active_power, voltage, global_current_intensity))
dataset["power_factor"] = [power_factor(active, voltage, current) for active, voltage, current in zipped_list]

### Active Energy

In [None]:
sub_metering_1 = dataset["Sub_metering_1"].tolist()
sub_metering_2 = dataset["Sub_metering_2"].tolist()
sub_metering_3 = dataset["Sub_metering_3"].tolist()
active_energy_zipped_list = zip(global_active_power, sub_metering_1, sub_metering_2, sub_metering_3)
dataset["active_energy"] = [active_energy(global_active_power, sub_metering_1, sub_metering_2, sub_metering_3) for global_active_power, sub_metering_1, sub_metering_2, sub_metering_3 in active_energy_zipped_list]

### Weather Data

In [None]:
location = "sceaux"
file_name = location+".csv"

if exists(file_name):
    hist_weather_data = pd.read_csv(file_name)
else:
    hist_weather_data = retrieve_hist_data("d21cdf5082614691a2b154348192407", [file_name], '16-12-2008',
                                           '26-11-2012', 1, location_label = False, export_csv = True, store_df = True)

In [None]:
hist_weather_data = hist_weather_data.rename(columns={"date_time": "time"})

In [None]:
hist_weather_data["time"] = hist_weather_data["time"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))

In [None]:
hist_weather_data = hist_weather_data[["time", "sunrise", "sunset", "tempC", "FeelsLikeC"]]

Since the weather data retrieved only goes back to July 1st 2008, the data needs to be offset by two years since the earliest date from the dataset goes back to 2006.

In [None]:
dataset["time"] = dataset['time'].apply(lambda x: x + pd.DateOffset(years=2))

In [None]:
weather_data_merged = hist_weather_data.merge(dataset, on='time', how='right').set_index("time")

In [None]:
weather_data_merged.columns.tolist()

In [None]:
weather_data_merged = weather_data_merged[["Global_active_power", 
                                            "Global_reactive_power", 
                                            "Voltage", 
                                            "Global_intensity", 
                                            "power_factor", 
                                            "active_energy",
                                            "Sub_metering_1",
                                            "Sub_metering_2",
                                            "Sub_metering_3", 
                                            "season",
                                            "tempC",
                                            "FeelsLikeC",
                                            "sunrise",
                                            "sunset"]]

In [None]:
dataset = weather_data_merged

In [None]:
dataset.head()

# Data Visualization: Overall Exploration

### Global Active Power

In [None]:
#Global Active power
sub_plot_resampling(dataset, "Global_active_power", "D", "Global_active_power daily resampling")
sub_plot_resampling(dataset, "Global_active_power", "M", "Global_active_power monthly resampling", 80, "bar")
sub_plot_resampling(dataset, "Global_active_power", "Q", "Global_active_power quarterly resampling", 60, "bar")

#Global Reactive Power
sub_plot_resampling(dataset, "Global_reactive_power", "D", "Global_reactive_power daily resampling", color = "red")
sub_plot_resampling(dataset, "Global_reactive_power", "M", "Global_reactive_power monthly resampling", 80, "bar", "red")
sub_plot_resampling(dataset, "Global_reactive_power", "Q", "Global_reactive_power quarterly resampling", 60, "bar", color="red")

#Voltage
sub_plot_resampling(dataset, "Voltage", "D", "Voltage daily resampling", color="green")
sub_plot_resampling(dataset, "Voltage", "M", "Voltage monthly resampling", 80, "bar", "green")
sub_plot_resampling(dataset, "Voltage", "Q", "Voltage quarterly resampling", 60, "bar", "green")

#Global Intensity
sub_plot_resampling(dataset, "Global_intensity", "D", "Global_intensity daily resampling", color="purple")
sub_plot_resampling(dataset, "Global_intensity", "M", "Global_intensity monthly resampling", 80, "bar", "purple")
sub_plot_resampling(dataset, "Global_intensity", "Q", "Global_intensity quarterly resampling", 60, "bar", "purple")

#Submetering 1
sub_plot_resampling(dataset, "Sub_metering_1", "D", "Sub_metering_1 daily resampling", color="brown")
sub_plot_resampling(dataset, "Sub_metering_1", "M", "Sub_metering_1 monthly resampling", 80, "bar", "brown")
sub_plot_resampling(dataset, "Sub_metering_1", "Q", "Sub_metering_1 quarterly resampling", 60, "bar", "brown")

#Submetering 2
sub_plot_resampling(dataset, "Sub_metering_2", "D", "Sub_metering_2 daily resampling", color="orange")
sub_plot_resampling(dataset, "Sub_metering_2", "M", "Sub_metering_2 monthly resampling", 80, "bar", "orange")
sub_plot_resampling(dataset, "Sub_metering_2", "Q", "Sub_metering_2 quarterly resampling", 60, "bar", "orange")

#Submetering 3
sub_plot_resampling(dataset, "Sub_metering_3", "D", "Sub_metering_3 daily resampling", color="cyan")
sub_plot_resampling(dataset, "Sub_metering_3", "M", "Sub_metering_3 monthly resampling", 80, "bar", "cyan")
sub_plot_resampling(dataset, "Sub_metering_3", "Q", "Sub_metering_3 quarterly resampling", 60, "bar", "cyan")

#Power Factor
sub_plot_resampling(dataset, "power_factor", "D", "power_factor daily resampling", color="black")
sub_plot_resampling(dataset, "power_factor", "M", "power_factor monthly resampling", 80, "bar", "black")
sub_plot_resampling(dataset, "power_factor", "Q", "power_factor quarterly resampling", 60, "bar", "black")

#Active Energy
sub_plot_resampling(dataset, "active_energy", "D", "active_energy daily resampling", color="pink")
sub_plot_resampling(dataset, "active_energy", "M", "active_energy monthly resampling", 80, "bar", "pink")
sub_plot_resampling(dataset, "active_energy", "Q", "active_energy quarterly resampling", 60, "bar", "pink")

### Visual Comparison

**Mean**

In [None]:
#Daily
i = 1
cols = [0, 1, 2, 3, 5, 6]
groups = cols
values = dataset.resample('D').mean().values
# plot each column
plt.figure(figsize=(17, 15))
for group in groups:
	plt.subplot(len(cols), 1, i)
	plt.plot(values[:, group])
	plt.title(dataset.columns[group], y=0.75, loc='right')
	i += 1
plt.show()

#Weekly
dataset.Global_active_power.resample('W').mean().plot(color='black', legend=True)
dataset.Global_reactive_power.resample('W').mean().plot(color='firebrick', legend=True)
dataset.Voltage.resample('W').mean().plot(color='sienna', legend=True)
dataset.Global_intensity.resample('W').mean().plot(color='seagreen', legend=True)
dataset.Sub_metering_1.resample('W').mean().plot(color='darkcyan', legend=True)
dataset.Sub_metering_2.resample('W').mean().plot(color='royalblue', legend=True)
dataset.Sub_metering_3.resample('W').mean().plot(color='darkorchid', legend=True)
dataset.power_factor.resample('W').mean().plot(color='crimson', legend=True)
dataset.active_energy.resample('W').mean().plot(color='pink', legend=True)
plt.show()

#Monthly
dataset.Global_active_power.resample('M').mean().plot(kind='hist', color='black', legend=True )
dataset.Global_reactive_power.resample('M').mean().plot(kind='hist',color='firebrick', legend=True)
dataset.Voltage.resample('M').mean().plot(kind='hist',color='sienna', legend=True)
dataset.Global_intensity.resample('M').mean().plot(kind='hist', color='seagreen', legend=True)
dataset.Sub_metering_1.resample('M').mean().plot(kind='hist', color='darkcyan', legend=True)
dataset.Sub_metering_2.resample('M').mean().plot(kind='hist', color='royalblue', legend=True)
dataset.Sub_metering_3.resample('M').mean().plot(kind='hist', color='darkorchid', legend=True)
dataset.power_factor.resample('M').mean().plot(kind='hist', color='crimson', legend=True)
dataset.active_energy.resample('M').mean().plot(kind='hist', color='pink', legend=True)
plt.show()

**Sum**

In [None]:
#Daily
i = 1
cols = [0, 1, 2, 3, 5, 6]
groups = cols
values = dataset.resample('D').sum().values
# plot each column
plt.figure(figsize=(17, 15))
for group in groups:
	plt.subplot(len(cols), 1, i)
	plt.plot(values[:, group])
	plt.title(dataset.columns[group], y=0.75, loc='right')
	i += 1
plt.show()

#Weekly
dataset.Global_active_power.resample('W').sum().plot(color='black', legend=True)
dataset.Global_reactive_power.resample('W').sum().plot(color='firebrick', legend=True)
dataset.Voltage.resample('W').sum().plot(color='sienna', legend=True)
dataset.Global_intensity.resample('W').sum().plot(color='seagreen', legend=True)
dataset.Sub_metering_1.resample('W').sum().plot(color='darkcyan', legend=True)
dataset.Sub_metering_2.resample('W').sum().plot(color='royalblue', legend=True)
dataset.Sub_metering_3.resample('W').sum().plot(color='darkorchid', legend=True)
dataset.power_factor.resample('W').sum().plot(color='crimson', legend=True)
dataset.active_energy.resample('W').sum().plot(color='pink', legend=True)
plt.show()

#Monthly
dataset.Global_active_power.resample('M').sum().plot(kind='hist', color='black', legend=True )
dataset.Global_reactive_power.resample('M').sum().plot(kind='hist',color='firebrick', legend=True)
dataset.Voltage.resample('M').sum().plot(kind='hist',color='sienna', legend=True)
dataset.Global_intensity.resample('M').sum().plot(kind='hist', color='seagreen', legend=True)
dataset.Sub_metering_1.resample('M').sum().plot(kind='hist', color='darkcyan', legend=True)
dataset.Sub_metering_2.resample('M').sum().plot(kind='hist', color='royalblue', legend=True)
dataset.Sub_metering_3.resample('M').sum().plot(kind='hist', color='darkorchid', legend=True)
dataset.power_factor.resample('M').sum().plot(kind='hist', color='crimson', legend=True)
dataset.active_energy.resample('M').sum().plot(kind='hist', color='pink', legend=True)
plt.show()

**Std**

In [None]:
#Daily
i = 1
cols = [0, 1, 2, 3, 5, 6]
groups = cols
values = dataset.resample('D').std().values
# plot each column
plt.figure(figsize=(17, 15))
for group in groups:
	plt.subplot(len(cols), 1, i)
	plt.plot(values[:, group])
	plt.title(dataset.columns[group], y=0.75, loc='right')
	i += 1
plt.show()

#Weekly
dataset.Global_active_power.resample('W').std().plot(color='black', legend=True)
dataset.Global_reactive_power.resample('W').std().plot(color='firebrick', legend=True)
dataset.Voltage.resample('W').std().plot(color='sienna', legend=True)
dataset.Global_intensity.resample('W').std().plot(color='seagreen', legend=True)
dataset.Sub_metering_1.resample('W').std().plot(color='darkcyan', legend=True)
dataset.Sub_metering_2.resample('W').std().plot(color='royalblue', legend=True)
dataset.Sub_metering_3.resample('W').std().plot(color='darkorchid', legend=True)
dataset.power_factor.resample('W').std().plot(color='crimson', legend=True)
dataset.active_energy.resample('W').std().plot(color='pink', legend=True)
plt.show()

#Monthly
dataset.Global_active_power.resample('M').std().plot(kind='hist', color='black', legend=True )
dataset.Global_reactive_power.resample('M').std().plot(kind='hist',color='firebrick', legend=True)
dataset.Voltage.resample('M').std().plot(kind='hist',color='sienna', legend=True)
dataset.Global_intensity.resample('M').std().plot(kind='hist', color='seagreen', legend=True)
dataset.Sub_metering_1.resample('M').std().plot(kind='hist', color='darkcyan', legend=True)
dataset.Sub_metering_2.resample('M').std().plot(kind='hist', color='royalblue', legend=True)
dataset.Sub_metering_3.resample('M').std().plot(kind='hist', color='darkorchid', legend=True)
dataset.power_factor.resample('M').std().plot(kind='hist', color='crimson', legend=True)
dataset.active_energy.resample('M').std().plot(kind='hist', color='pink', legend=True)
plt.show()

**Correlation Map**

In [None]:
dataset.corr().style.background_gradient(cmap = "PRGn")

In [None]:
dataset.resample("D").mean().corr().style.background_gradient(cmap = "PRGn")

In [None]:
dataset.resample("M").mean().corr().style.background_gradient(cmap = "PRGn")

In [None]:
dataset.resample("Q").mean().corr().style.background_gradient(cmap = "PRGn")

# Data Visualization: Seasonal Exploration

### Winter

In [None]:
winter_dataset = dataset.loc[dataset['season']=="winter"]

In [None]:
#Global Active power
sub_plot_resampling(winter_dataset, "Global_active_power", "D", "Winter Global_active_power daily resampling")
sub_plot_resampling(winter_dataset, "Global_active_power", "W", "Winter Global_active_power monthly resampling", 80, "bar")
sub_plot_resampling(winter_dataset, "Global_active_power", "Q", "Winter Global_active_power quarterly resampling", 60, "bar")

#Global Reactive Power
sub_plot_resampling(winter_dataset, "Global_reactive_power", "D", "Winter Global_reactive_power daily resampling", color = "red")
sub_plot_resampling(winter_dataset, "Global_reactive_power", "W", "Winter Global_reactive_power monthly resampling", 80, "bar", "red")
sub_plot_resampling(winter_dataset, "Global_reactive_power", "Q", "Winter Global_reactive_power quarterly resampling", 60, "bar", color="red")

#Voltage
sub_plot_resampling(winter_dataset, "Voltage", "D", "Winter Voltage daily resampling", color="green")
sub_plot_resampling(winter_dataset, "Voltage", "W", "Winter Voltage monthly resampling", 80, "bar", "green")
sub_plot_resampling(winter_dataset, "Voltage", "Q", "Winter Voltage quarterly resampling", 60, "bar", "green")

#Global Intensity
sub_plot_resampling(winter_dataset, "Global_intensity", "D", "Winter Global_intensity daily resampling", color="purple")
sub_plot_resampling(winter_dataset, "Global_intensity", "W", "Winter Global_intensity monthly resampling", 80, "bar", "purple")
sub_plot_resampling(winter_dataset, "Global_intensity", "Q", "Winter Global_intensity quarterly resampling", 60, "bar", "purple")

#Submetering 1
sub_plot_resampling(winter_dataset, "Sub_metering_1", "D", "Winter Sub_metering_1 daily resampling", color="brown")
sub_plot_resampling(winter_dataset, "Sub_metering_1", "W", "Winter Sub_metering_1 monthly resampling", 80, "bar", "brown")
sub_plot_resampling(winter_dataset, "Sub_metering_1", "Q", "Winter Sub_metering_1 quarterly resampling", 60, "bar", "brown")

#Submetering 2
sub_plot_resampling(winter_dataset, "Sub_metering_2", "D", "Winter Sub_metering_2 daily resampling", color="orange")
sub_plot_resampling(winter_dataset, "Sub_metering_2", "W", "Winter Sub_metering_2 monthly resampling", 80, "bar", "orange")
sub_plot_resampling(winter_dataset, "Sub_metering_2", "Q", "Winter Sub_metering_2 quarterly resampling", 60, "bar", "orange")

#Submetering 3
sub_plot_resampling(winter_dataset, "Sub_metering_3", "D", "Winter Sub_metering_3 daily resampling", color="cyan")
sub_plot_resampling(winter_dataset, "Sub_metering_3", "W", "Winter Sub_metering_3 monthly resampling", 80, "bar", "cyan")
sub_plot_resampling(winter_dataset, "Sub_metering_3", "Q", "Winter Sub_metering_3 quarterly resampling", 60, "bar", "cyan")

**Correlation Map**

In [None]:
winter_dataset.corr().style.background_gradient(cmap = "PRGn")

In [None]:
winter_dataset.resample("D").mean().corr().style.background_gradient(cmap = "PRGn")

In [None]:
winter_dataset.resample("M").mean().corr().style.background_gradient(cmap = "PRGn")

In [None]:
winter_dataset.resample("Q").mean().corr().style.background_gradient(cmap = "PRGn")

**Spring**

In [None]:
spring_dataset = dataset.loc[dataset['season']=="spring"]

In [None]:
#Global Active power
sub_plot_resampling(spring_dataset, "Global_active_power", "D", "Spring Global_active_power daily resampling")
sub_plot_resampling(spring_dataset, "Global_active_power", "W", "Spring Global_active_power monthly resampling", 80, "bar")
sub_plot_resampling(spring_dataset, "Global_active_power", "Q", "Spring Global_active_power quarterly resampling", 60, "bar")

#Global Reactive Power
sub_plot_resampling(spring_dataset, "Global_reactive_power", "D", "Spring Global_reactive_power daily resampling", color = "red")
sub_plot_resampling(spring_dataset, "Global_reactive_power", "W", "Spring Global_reactive_power monthly resampling", 80, "bar", "red")
sub_plot_resampling(spring_dataset, "Global_reactive_power", "Q", "Spring Global_reactive_power quarterly resampling", 60, "bar", color="red")

#Voltage
sub_plot_resampling(spring_dataset, "Voltage", "D", "Spring Voltage daily resampling", color="green")
sub_plot_resampling(spring_dataset, "Voltage", "W", "Spring Voltage monthly resampling", 80, "bar", "green")
sub_plot_resampling(spring_dataset, "Voltage", "Q", "Spring Voltage quarterly resampling", 60, "bar", "green")

#Global Intensity
sub_plot_resampling(spring_dataset, "Global_intensity", "D", "Spring Global_intensity daily resampling", color="purple")
sub_plot_resampling(spring_dataset, "Global_intensity", "W", "Spring Global_intensity monthly resampling", 80, "bar", "purple")
sub_plot_resampling(spring_dataset, "Global_intensity", "Q", "Spring Global_intensity quarterly resampling", 60, "bar", "purple")

#Submetering 1
sub_plot_resampling(spring_dataset, "Sub_metering_1", "D", "Spring Sub_metering_1 daily resampling", color="brown")
sub_plot_resampling(spring_dataset, "Sub_metering_1", "W", "Spring Sub_metering_1 monthly resampling", 80, "bar", "brown")
sub_plot_resampling(spring_dataset, "Sub_metering_1", "Q", "Spring Sub_metering_1 quarterly resampling", 60, "bar", "brown")

#Submetering 2
sub_plot_resampling(spring_dataset, "Sub_metering_2", "D", "Spring Sub_metering_2 daily resampling", color="orange")
sub_plot_resampling(spring_dataset, "Sub_metering_2", "W", "Spring Sub_metering_2 monthly resampling", 80, "bar", "orange")
sub_plot_resampling(spring_dataset, "Sub_metering_2", "Q", "Spring Sub_metering_2 quarterly resampling", 60, "bar", "orange")

#Submetering 3
sub_plot_resampling(spring_dataset, "Sub_metering_3", "D", "Spring Sub_metering_3 daily resampling", color="cyan")
sub_plot_resampling(spring_dataset, "Sub_metering_3", "W", "Spring Sub_metering_3 monthly resampling", 80, "bar", "cyan")
sub_plot_resampling(spring_dataset, "Sub_metering_3", "Q", "Spring Sub_metering_3 quarterly resampling", 60, "bar", "cyan")

**Correlation Map**

In [None]:
spring_dataset.corr().style.background_gradient(cmap = "PRGn")

In [None]:
spring_dataset.resample("D").mean().corr().style.background_gradient(cmap = "PRGn")

In [None]:
spring_dataset.resample("M").mean().corr().style.background_gradient(cmap = "PRGn")

In [None]:
spring_dataset.resample("Q").mean().corr().style.background_gradient(cmap = "PRGn")

**Summer**

In [None]:
summer_dataset = dataset.loc[dataset['season']=="summer"]

In [None]:
#Global Active power
sub_plot_resampling(summer_dataset, "Global_active_power", "D", "Summer Global_active_power daily resampling")
sub_plot_resampling(summer_dataset, "Global_active_power", "W", "Summer Global_active_power monthly resampling", 80, "bar")
sub_plot_resampling(summer_dataset, "Global_active_power", "Q", "Summer Global_active_power quarterly resampling", 60, "bar")

#Global Reactive Power
sub_plot_resampling(summer_dataset, "Global_reactive_power", "D", "Summer Global_reactive_power daily resampling", color = "red")
sub_plot_resampling(summer_dataset, "Global_reactive_power", "W", "Summer Global_reactive_power monthly resampling", 80, "bar", "red")
sub_plot_resampling(summer_dataset, "Global_reactive_power", "Q", "Summer Global_reactive_power quarterly resampling", 60, "bar", color="red")

#Voltage
sub_plot_resampling(summer_dataset, "Voltage", "D", "Summer Voltage daily resampling", color="green")
sub_plot_resampling(summer_dataset, "Voltage", "W", "Summer Voltage monthly resampling", 80, "bar", "green")
sub_plot_resampling(summer_dataset, "Voltage", "Q", "Summer Voltage quarterly resampling", 60, "bar", "green")

#Global Intensity
sub_plot_resampling(summer_dataset, "Global_intensity", "D", "Summer Global_intensity daily resampling", color="purple")
sub_plot_resampling(summer_dataset, "Global_intensity", "W", "Summer Global_intensity monthly resampling", 80, "bar", "purple")
sub_plot_resampling(summer_dataset, "Global_intensity", "Q", "Summer Global_intensity quarterly resampling", 60, "bar", "purple")

#Submetering 1
sub_plot_resampling(summer_dataset, "Sub_metering_1", "D", "Summer Sub_metering_1 daily resampling", color="brown")
sub_plot_resampling(summer_dataset, "Sub_metering_1", "W", "Summer Sub_metering_1 monthly resampling", 80, "bar", "brown")
sub_plot_resampling(summer_dataset, "Sub_metering_1", "Q", "Summer Sub_metering_1 quarterly resampling", 60, "bar", "brown")

#Submetering 2
sub_plot_resampling(summer_dataset, "Sub_metering_2", "D", "Summer Sub_metering_2 daily resampling", color="orange")
sub_plot_resampling(summer_dataset, "Sub_metering_2", "W", "Summer Sub_metering_2 monthly resampling", 80, "bar", "orange")
sub_plot_resampling(summer_dataset, "Sub_metering_2", "Q", "Summer Sub_metering_2 quarterly resampling", 60, "bar", "orange")

#Submetering 3
sub_plot_resampling(summer_dataset, "Sub_metering_3", "D", "Summer Sub_metering_3 daily resampling", color="cyan")
sub_plot_resampling(summer_dataset, "Sub_metering_3", "W", "Summer Sub_metering_3 monthly resampling", 80, "bar", "cyan")
sub_plot_resampling(summer_dataset, "Sub_metering_3", "Q", "Summer Sub_metering_3 quarterly resampling", 60, "bar", "cyan")

**Correlation Map**

No resampling

In [None]:
summer_dataset.corr().style.background_gradient(cmap = "PRGn")

Daily resampling

In [None]:
summer_dataset.resample("D").mean().corr().style.background_gradient(cmap = "PRGn")

Monthly resampling

In [None]:
summer_dataset.resample("M").mean().corr().style.background_gradient(cmap = "PRGn")

Quarterly resampling

In [None]:
summer_dataset.resample("Q").mean().corr().style.background_gradient(cmap = "PRGn")

**Autumn**

In [None]:
autumn_dataset = dataset.loc[dataset['season']=="autumn"]

In [None]:
#Global Active power
sub_plot_resampling(autumn_dataset, "Global_active_power", "D", "Autumn Global_active_power daily resampling")
sub_plot_resampling(autumn_dataset, "Global_active_power", "W", "Autumn Global_active_power monthly resampling", 80, "bar")
sub_plot_resampling(autumn_dataset, "Global_active_power", "Q", "Autumn Global_active_power quarterly resampling", 60, "bar")

#Global Reactive Power
sub_plot_resampling(autumn_dataset, "Global_reactive_power", "D", "Autumn Global_reactive_power daily resampling", color = "red")
sub_plot_resampling(autumn_dataset, "Global_reactive_power", "W", "Autumn Global_reactive_power monthly resampling", 80, "bar", "red")
sub_plot_resampling(autumn_dataset, "Global_reactive_power", "Q", "Autumn Global_reactive_power quarterly resampling", 60, "bar", color="red")

#Voltage
sub_plot_resampling(autumn_dataset, "Voltage", "D", "Autumn Voltage daily resampling", color="green")
sub_plot_resampling(autumn_dataset, "Voltage", "W", "Autumn Voltage monthly resampling", 80, "bar", "green")
sub_plot_resampling(autumn_dataset, "Voltage", "Q", "Autumn Voltage quarterly resampling", 60, "bar", "green")

#Global Intensity
sub_plot_resampling(autumn_dataset, "Global_intensity", "D", "Autumn Global_intensity daily resampling", color="purple")
sub_plot_resampling(autumn_dataset, "Global_intensity", "W", "Autumn Global_intensity monthly resampling", 80, "bar", "purple")
sub_plot_resampling(autumn_dataset, "Global_intensity", "Q", "Autumn Global_intensity quarterly resampling", 60, "bar", "purple")

#Submetering 1
sub_plot_resampling(autumn_dataset, "Sub_metering_1", "D", "Autumn Sub_metering_1 daily resampling", color="brown")
sub_plot_resampling(autumn_dataset, "Sub_metering_1", "W", "Autumn Sub_metering_1 monthly resampling", 80, "bar", "brown")
sub_plot_resampling(autumn_dataset, "Sub_metering_1", "Q", "Autumn Sub_metering_1 quarterly resampling", 60, "bar", "brown")

#Submetering 2
sub_plot_resampling(autumn_dataset, "Sub_metering_2", "D", "Autumn Sub_metering_2 daily resampling", color="orange")
sub_plot_resampling(autumn_dataset, "Sub_metering_2", "W", "Autumn Sub_metering_2 monthly resampling", 80, "bar", "orange")
sub_plot_resampling(autumn_dataset, "Sub_metering_2", "Q", "Autumn Sub_metering_2 quarterly resampling", 60, "bar", "orange")

#Submetering 3
sub_plot_resampling(autumn_dataset, "Sub_metering_3", "D", "Autumn Sub_metering_3 daily resampling", color="cyan")
sub_plot_resampling(autumn_dataset, "Sub_metering_3", "W", "Autumn Sub_metering_3 monthly resampling", 80, "bar", "cyan")
sub_plot_resampling(autumn_dataset, "Sub_metering_3", "Q", "Autumn Sub_metering_3 quarterly resampling", 60, "bar", "cyan")

**Correlation Map**

No resampling

In [None]:
autumn_dataset.corr().style.background_gradient(cmap = "PRGn")

Daily resampling

In [None]:
autumn_dataset.resample("D").mean().corr().style.background_gradient(cmap = "PRGn")

Monthly resampling

In [None]:
autumn_dataset.resample("M").mean().corr().style.background_gradient(cmap = "PRGn")

Quarterly resampling

In [None]:
autumn_dataset.resample("Q").mean().corr().style.background_gradient(cmap = "PRGn")

# FB Prophet