# Neural Porphet

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import os
from sklearn import preprocessing
from neuralprophet import NeuralProphet, set_random_seed

Importing plotly failed. Interactive plots will not work.
Importing plotly failed. Interactive plots will not work.


In [4]:
import pkg_resources

libraries = ['warnings', 'pandas', 'os', 'sklearn', 'neuralprophet']


# Print version for each library
for lib in libraries:
    try:
        version = pkg_resources.get_distribution(lib).version
        print(f"{lib}: {version}")
    except pkg_resources.DistributionNotFound:
        print(f"{lib} is not installed.")


pandas: 2.2.3
os is not installed.
sklearn is not installed.
neuralprophet: 0.8.0


In [2]:
folder = os.getcwd()
train = pd.read_csv(os.path.join(os.path.dirname(folder), "data/training.csv"))
test = pd.read_csv(os.path.join(os.path.dirname(folder), "data/testing.csv"))

### Data Preparation

In [3]:
train["date"] = pd.to_datetime(train["date"], format = "%Y-%m-%d")
train = train.rename(columns={train.columns[0]: 'ds'})
train = train.rename(columns={'wind_prod': 'y'})

test["date"] = pd.to_datetime(test["date"], format = "%Y-%m-%d")
test = test.rename(columns={test.columns[0]: 'ds'})
test = test.rename(columns={'wind_prod': 'y'})

# Add to the testing test the last 30 observation of the training test for n_lags = 30
test = pd.concat([train.tail(30), test], ignore_index=True)

# Normalize the dataframes
scaler_train = preprocessing.MinMaxScaler()
train_norm_tmp = scaler_train.fit_transform(train.iloc[:, 1:])
train_norm_tmp = pd.DataFrame(train_norm_tmp)
train_norm_tmp.columns=list(train.iloc[:, 1:].columns)
train_norm = pd.concat([train[["ds"]], train_norm_tmp], axis=1)

test_norm_tmp = scaler_train.fit_transform(test.iloc[:, 1:])
test_norm_tmp = pd.DataFrame(test_norm_tmp)
test_norm_tmp.columns=list(test.iloc[:, 1:].columns)
test_norm = pd.concat([test[["ds"]], test_norm_tmp], axis=1)

### Model Estimation

In [4]:
set_random_seed(2207)

p0 = NeuralProphet(daily_seasonality=False, n_lags=30)

for regressor in train_norm.iloc[:, 2:].columns:
    p0.add_future_regressor(name = regressor)

p0_fit = p0.fit(train_norm, freq="D")

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.963% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 64
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 80


Finding best initial lr:   0%|          | 0/236 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

### Predictions

In [5]:
pred = p0.predict(test_norm)
pred_new_shape = pd.concat([pred.iloc[30:,2], pred.iloc[30:,8:]], axis = 1)
pred_new_shape = scaler_train.inverse_transform(pred_new_shape)
final_pred = pd.concat([pd.DataFrame(pred.iloc[30:, 0].reset_index(drop=True)), pd.DataFrame(pred_new_shape)[0]], axis=1, ignore_index=True)
final_pred = pd.concat([final_pred, test.iloc[31:, 1].reset_index(drop=True)], axis=1, ignore_index=True)
final_pred.columns = ["date", "predicted", "observed"]

if not os.path.isdir(os.path.join(folder, "predictions")):
    os.makedirs(os.path.join(folder, "predictions"))
final_pred.to_csv(os.path.join(folder, "predictions/neurlprophet_predictions.csv"), index = False)

INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.533% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.df_utils._infer_frequency) - Major frequency D corresponds to 99.535% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - D
INFO - (NP.data.processing._handle_missing_data) - Dropped 1 rows at the end with NaNs in future regressors.


Predicting: 43it [00:00, ?it/s]

INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
