In [10]:
import paths
import numpy as np
import pandas as pd
import yaml
from pathlib import Path
import matplotlib.pyplot as plt
from models.DGP import DistributedGP as DGP
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
from real_applications.manufacturing.pre_processing import data_processing_methods as dpm

"""
NSG data
"""
# NSG post processes data location
file = paths.get_nsg_path('processed/NSG_data.xlsx')

# Training df
X_df = pd.read_excel(file, sheet_name='X_training_stand')
y_df = pd.read_excel(file, sheet_name='y_training')
y_raw_df = pd.read_excel(file, sheet_name='y_raw_training')
t_df = pd.read_excel(file, sheet_name='time')

# Pre-Process training data
X, y0, N0, D, max_lag, time_lags = dpm.align_arrays(X_df, y_df, t_df)

# Process raw targets
# Just removes the first max_lag points from the date_time array.
y_raw = dpm.adjust_time_lag(y_raw_df['raw_furnace_faults'].values,
                            shift=0,
                            to_remove=max_lag)

# Extract corresponding time stamps. Note this essentially just
# removes the first max_lag points from the date_time array.
date_time = dpm.adjust_time_lag(y_df['Time stamp'].values,
                                shift=0,
                                to_remove=max_lag)

"""
READ config
"""

with open(paths.get_config_path('config0.yml'), 'r') as f:
    config = yaml.safe_load(f)
    N_gps = 8
    val_split = config['test_per']

# Train and test data
N, D = np.shape(X)
start_train = 0
training_per = 0.84          # 84% avoids noise burst at the end of data
end_train = int(N*training_per)
end_test = N

X_train, y_train = X[start_train:end_train], y0[start_train:end_train]
X_test, y_test = X[start_train:end_test], y0[start_train:end_test]
N_train = len(y_train)

date_time = date_time[start_train:end_test]
y_raw = y_raw[start_train:end_test]
y_rect = y0[start_train:end_test]

"""
DPGP regression
"""
# Save memory
del X_df, y_df, dpm

# Length scales
ls = [800, 800, 800, 800, 800, 800, 800, 800, 800, 800]

# Kernels
se = 1**2 * RBF(length_scale=ls, length_scale_bounds=(0.1, 1e5))
wn = WhiteKernel(noise_level=0.61**2, noise_level_bounds=(1e-5, 1))

kernel = se + wn

dgp = DGP(X_train, y_train, N_GPs=N_gps, kernel=kernel)
dgp.train()

"""
SAVE TRAINED MODEL
"""
import pickle

with open('DGP_'+str(N_gps)+'.pkl', 'wb') as f:
    pickle.dump(dgp, f)

