In [51]:
# python typical
import numpy as np
import matplotlib.pyplot as plt
import copy
from typing import List, Dict, Optional
from enum import Enum, IntEnum, auto
from dataclasses import dataclass

# sklearn
from sklearn.model_selection import (
    train_test_split, KFold, cross_val_score
)

# # tensor flow
# import tensorflow.keras as keras
# from tensorflow.keras.layers import Dense
# from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

# python debugger
from icecream import ic # Debugger

In [52]:

# one layer model:



In [53]:
class P3_Env:
    _train_data_x: List[float]
    _train_data_y: List[float]
    _test_data_x: List[float]
    _test_data_y: List[float]

    @staticmethod
    def print(content: str):
        print("[ P3_Env ] > {}".format(content))

    def __init__(
        self, 
        f_data_function, 
        x_range: List[float],
        # default common configuration
        data_pts_i: List[int]       = [100],#[10,40,80,200],
        hidden_nodes_j: List[int]   = [100],#[2,10,40,100],
        N_eval_per_model: int       = 5, # repeat the process 5 times by shuffling the data generated randomly
        MAX_DATA_SIZE: int          = 500,
        TRAIN_SIZE: float           = 0.8, # 80 % for training by default
    )->None:
        self.data_pts_i = data_pts_i
        self.hidden_nodes_j = hidden_nodes_j
        self.N_eval_per_model = N_eval_per_model
        self.MAX_DATA_SIZE = MAX_DATA_SIZE
        self.TRAIN_SIZE = TRAIN_SIZE

        # generate storage:
        self.M_ij_train_errs = np.zeros((len(data_pts_i), len(hidden_nodes_j), N_eval_per_model))
        self.M_ij_valid_errs = np.zeros((len(data_pts_i), len(hidden_nodes_j), N_eval_per_model))
        self.mlps = {}

        # generate test data
        data_x = np.arange(x_range[0], x_range[1], (x_range[1]-x_range[0])/MAX_DATA_SIZE)
        data_y = f_data_function(data_x)
        
        self._train_data_x, self._test_data_x, self._train_data_y, self._test_data_y = \
            train_test_split(data_x, data_y, train_size=TRAIN_SIZE, shuffle=True)

        # report
        self.print("Data Size: [ Train: {train} | Test: {test} ]"\
            .format(train=np.shape(self._train_data_x), test=np.shape(self._test_data_x)))


    def run(
        self,
        validation_split: float = 0.1, # Default: 10-fold x-validation
        N_epoch: int            = 100, # Early stopping
        plot: bool              = True
    ):
        # generate models:
        # for n in range(N_eval_per_model):
        for i in range(len(self.data_pts_i)):
            n_data = self.data_pts_i[i]
            
            # data selection and shuffle
            c = list(zip(self._train_data_x, self._train_data_y))
            np.random.shuffle(c)
            X,Y = zip(*c)
            # down sample
            X,Y = X[0:n_data],Y[0:n_data]

            # time to train!
            for j in range(len(self.hidden_nodes_j)):
                n_nodes = self.hidden_nodes_j[j]
                mode = "{itr}_dn={i}_hn={j}".format(itr=0, i=n_data, j=n_nodes)
                self.print(mode)

                # define base model:
                # mlp = keras.models.Sequential([
                #     Dense(n_nodes, activation='sigmoid', input_shape=(1,)),
                #     Dense(1, kernel_initializer='normal')
                # ])
                # mlp.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
                # print(mlp.summary())
            
                # evaluate model
                # estimator = KerasRegressor(build_fn=mlp, epochs=N_epoch, batch_size=5, verbose=0)

                # 10-fold
                # kfold = KFold(n_splits=10)
                # results = cross_val_score(estimator, X, Y, cv=kfold)
                # self.print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))

                # # train
                # h = mlp.fit(X, Y, \
                #     epochs=N_epoch, validation_split=0.25, batch_size=32, verbose=0)

                # gen report
                # if plot:
                #     fig1 = plt.gcf()
                #     ax1 = plt.subplot(111)
                #     plt.plot(h.history['accuracy'])
                #     plt.plot(h.history['val_accuracy'], 'r')
                #     plt.legend(['train acc', 'val acc'])
                #     fig1.savefig("fig/p3/train_progress_{}.png".format(
                #         mode
                #     ), bbox_inches = 'tight')

                # self.print('train acc: %.2f %%'%(h.history['accuracy'][-1]*100))
                # self.print('valid acc: %.2f %%'%(h.history['val_accuracy'][-1]*100))

                # # store error data
                # self.M_ij_train_errs[i][j][0] = h.history['accuracy'][-1]
                # self.M_ij_valid_errs[i][j][0] = h.history['val_accuracy'][-1]

                # self.mlps[mode] = {
                #     "model": mlp,
                #     "train_result": h
                # }


In [54]:
TEST = P3_Env(
    f_data_function = (lambda x: x * np.sin(6 * np.pi * x) * np.exp(- x ** 2)),
    x_range = [-1, 1]
)

[ P3_Env ] > Data Size: [ Train: (400,) | Test: (100,) ]


In [55]:
TEST.run()

[ P3_Env ] > 0_dn=100_hn=100
[ P3_Env ] > Results: nan (nan) MSE
