In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
individual_household_electric_power_consumption = fetch_ucirepo(id=235) 
  
# data (as pandas dataframes) 
X = individual_household_electric_power_consumption.data.features 
y = individual_household_electric_power_consumption.data.targets

  df = pd.read_csv(data_url)


In [3]:
X.shape

(2075259, 9)

In [4]:
X.head()

Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
0,16/12/2006,17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
1,16/12/2006,17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0
2,16/12/2006,17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
3,16/12/2006,17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
4,16/12/2006,17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0


In [5]:
X = X[["Date", "Time", "Global_active_power", "Voltage", "Global_intensity"]]
X.shape

(2075259, 5)

In [6]:
X = X.rename(columns={"Global_active_power":'P', "Voltage":'V', "Global_intensity":'I'})
X.head()

Unnamed: 0,Date,Time,P,V,I
0,16/12/2006,17:24:00,4.216,234.84,18.4
1,16/12/2006,17:25:00,5.36,233.63,23.0
2,16/12/2006,17:26:00,5.374,233.29,23.0
3,16/12/2006,17:27:00,5.388,233.74,23.0
4,16/12/2006,17:28:00,3.666,235.68,15.8


In [7]:
X['P'] = pd.to_numeric(X['P'], errors="coerce")
X['V'] = pd.to_numeric(X['V'], errors="coerce")
X['I'] = pd.to_numeric(X['I'], errors="coerce")

In [8]:
X.isna().sum()

Date        0
Time        0
P       25979
V       25979
I       25979
dtype: int64

In [9]:
#0: presente
#valores positivos: datos en el pasado (1, 2, ...)
#valores negativos: datos en el futuro (-1, -2, ...)
X['P'].shift(periods = [2, 1, 0, -1]).head()

Unnamed: 0,P_2,P_1,P_0,P_-1
0,,,4.216,5.36
1,,4.216,5.36,5.374
2,4.216,5.36,5.374,5.388
3,5.36,5.374,5.388,3.666
4,5.374,5.388,3.666,3.52


In [10]:
X['P'].shift(periods = [2, 1, 0, -1]).dropna().head()

Unnamed: 0,P_2,P_1,P_0,P_-1
2,4.216,5.36,5.374,5.388
3,5.36,5.374,5.388,3.666
4,5.374,5.388,3.666,3.52
5,5.388,3.666,3.52,3.702
6,3.666,3.52,3.702,3.7


In [11]:
X_P = X['P'].shift(periods = [2, 1, 0, -1]).dropna()
X_V = X['V'].shift(periods = [2, 1, 0]).dropna()
X_I = X['I'].shift(periods = [2, 1, 0]).dropna()

In [12]:
X_prime = X_P.join(X_V).join(X_I)
X_prime.head()

Unnamed: 0,P_2,P_1,P_0,P_-1,V_2,V_1,V_0,I_2,I_1,I_0
2,4.216,5.36,5.374,5.388,234.84,233.63,233.29,18.4,23.0,23.0
3,5.36,5.374,5.388,3.666,233.63,233.29,233.74,23.0,23.0,23.0
4,5.374,5.388,3.666,3.52,233.29,233.74,235.68,23.0,23.0,15.8
5,5.388,3.666,3.52,3.702,233.74,235.68,235.02,23.0,15.8,15.0
6,3.666,3.52,3.702,3.7,235.68,235.02,235.09,15.8,15.0,15.8


In [13]:
X_prime.shape

(2049066, 10)

In [14]:
y = X_prime[["P_-1"]]
X = X_prime.drop("P_-1", axis=1)
X.shape, y.shape

((2049066, 9), (2049066, 1))

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test = train_test_split(X, test_size=100000, random_state=42)
X_train.shape, X_test.shape

((1949066, 9), (100000, 9))

In [16]:
y_train, y_test = train_test_split(y, test_size=100000, random_state=42)

y_train = np.reshape(y_train, -1)
y_test = np.reshape(y_test, -1)
y_train.shape, y_test.shape

((1949066,), (100000,))

In [17]:
#capa de entrada: 9 neuronas
#primer capa oculta: 15 neuronas
#segunda capa oculta: 5 neuronas
#capa de salida: 1 neurona
from sklearn.neural_network import MLPRegressor
model = MLPRegressor(hidden_layer_sizes=(15, 5), max_iter=1500)

In [18]:
model.fit(X_train, y_train)

In [19]:
y_pred = model.predict(X_test)

In [20]:
rmse = np.sqrt(np.mean(np.square(y_pred - y_test)))
rango = y.values.max() - y.values.min()
nrmse = (rmse / rango) * 100
print("Test NRMSE:", round(nrmse, 4), '%')

Test NRMSE: 2.3906 %


In [21]:
import pickle
filename = 'finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))