Prepare Dataset

In [1]:
from sklearn.preprocessing import StandardScaler
from zoo.chronos.data.repo_dataset import get_public_dataset

In [2]:
name = 'network_traffic'
path = '~/.chronos/dataset/'

stand = StandardScaler()
tsdata_train,tsdata_val,\
    tsdata_test = get_public_dataset(name,path,
                                        download=False,
                                        with_split=True,
                                        val_ratio=0.1,
                                        test_ratio=0.1)
for tsdata in [tsdata_train, tsdata_val, tsdata_test]:
    tsdata.gen_dt_feature(one_hot_features=['HOUR', 'WEEK'])\
            .scale(stand, fit=tsdata is tsdata_train)


Creator 3rd_party_model

In [3]:
import torch
from torch import nn

In [4]:

class GRUNet(nn.Module):
    def __init__(self, input_size, num_layers, hidden_size, dropout, output_size):
        super(GRUNet, self).__init__()
        self.num_layers=num_layers
        self.hidden_size=hidden_size
        self.gru = nn.GRU(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.randn(self.num_layers, x.size(0), self.hidden_size)
        x, _ = self.gru(x, h0)
        out = self.fc(x[:, -1, :])
        out = out.view(out.shape[0], 1, out.shape[-1])
        return out

In [5]:
def model_creator(config):
    return GRUNet(input_size=config['input_feature_num'],
                  num_layers=2,
                  hidden_size=config['hidden_size'],
                  dropout=config['dropout'],
                  output_size=config['output_feature_num'])

Init orca context

In [6]:
from zoo.orca import init_orca_context

In [7]:
init_orca_context(cores=2,memory='10g')

Initializing orca context
Current pyspark location is : /home/liangs/spark/python/lib/pyspark.zip/pyspark/__init__.py
Start to getOrCreate SparkContext
pyspark_submit_args is:  --driver-class-path /home/liangs/analytics-zoo/zoo/target/analytics-zoo-bigdl_0.13.0-spark_2.4.6-0.12.0-SNAPSHOT-jar-with-dependencies.jar pyspark-shell 
Successfully got a SparkContext


Hyparameter variable

In [8]:
from zoo.orca.automl import hp

In [9]:
search_space={
    'hidden_size': hp.grid_search([32,64]),
    'dropout': hp.uniform(0.1, 0.2)
}


Init Autoestmiator

In [10]:
from zoo.chronos.autots.autotsestimator import AutoTSEstimator

In [11]:
autotsest = AutoTSEstimator(model=model_creator,
                            search_space=search_space,
                            past_seq_len=15,
                            future_seq_len=1,
                            metric='mse',
                            loss=torch.nn.MSELoss(),
                            logs_dir='/home/liangs/.test_permission/',
                            cpus_per_trial=2)

2021-09-26 21:24:35,204	INFO services.py:1174 -- View the Ray dashboard at [1m[32mhttp://10.239.44.67:8265[39m[22m


{'node_ip_address': '10.239.44.67', 'raylet_ip_address': '10.239.44.67', 'redis_address': '10.239.44.67:6379', 'object_store_address': '/tmp/ray/session_2021-09-26_21-24-34_627059_18563/sockets/plasma_store', 'raylet_socket_name': '/tmp/ray/session_2021-09-26_21-24-34_627059_18563/sockets/raylet', 'webui_url': '10.239.44.67:8265', 'session_dir': '/tmp/ray/session_2021-09-26_21-24-34_627059_18563', 'metrics_export_port': 61844, 'node_id': '99029305b930017d460c89af1bf14ea70a0868d5841dfb4a31b2d57b'}


Fit

In [12]:
ts_pipeline = autotsest.fit(data=tsdata_train,
                            epochs=5,
                            batch_size=hp.choice([32,64]),
                            validation_data=tsdata_val,
                            n_sampling=1)

2021-09-26 21:24:56,316	INFO tune.py:450 -- Total run time: 18.20 seconds (18.12 seconds for the tuning loop).


model evaluate

In [13]:
rmse, smape = ts_pipeline.evaluate(tsdata_test,multioutput='raw_values', metrics=['rmse','smape'])
print(f'AvgRate rmse is: {rmse[0][0]}, smape is: {smape[0][0]:.4f}')
print(f'total rmse is: {rmse[0][1]}, smape is: {smape[0][1]:.4f}')

AvgRate rmse is: 68.57215358507068, smape is: 7.7690
total rmse is: 62663071804.620605, smape is: 7.9271


In [14]:
from zoo.orca.common import stop_orca_context
stop_orca_context()

Stopping orca context


plot

In [16]:
import matplotlib.pyplot as plt

In [17]:
def plot_predict_actual_values(date, y_pred, y_test, ylabel):
    """
    plot the predicted values and actual values (for the test data)
    """
    fig, axs = plt.subplots(figsize=(12,5))

    axs.plot(date, y_pred, color='red', label='predicted values')
    axs.plot(date, y_test, color='blue', label='actual values')
    axs.set_title('the predicted values and actual values (for the test data)')

    plt.xlabel('test datetime')
    plt.ylabel(ylabel)
    plt.legend(loc='upper left')
    plt.show()

In [22]:
yhat = ts_pipeline.predict(tsdata_test)
x_test, y_test = tsdata_test.roll(lookback=15,horizon=1).to_numpy()

In [23]:
unscale_y_test = tsdata_test.unscale_numpy(y_test)

In [24]:
plot_predict_actual_values(tsdata_test.df.StartTime,y_pred=yhat,y_test=unscale_y_test,ylabel='total')

TypeError: plot_predict_actual_values() missing 1 required positional argument: 'ylabel'