In [1]:
# Average window_stride elements together to form a single row
WINDOW_STRIDE = 12

SAMPLE_HOURS = WINDOW_STRIDE / 12.0

# Number of future samples to mean for prediction
PREDICTION_WINDOW = int(24 / SAMPLE_HOURS)

# Length of the windowed sequence
SEQUENCE_LENGTH = int(7*24 / SAMPLE_HOURS)

# Input Features
INPUT_COLUMNS = ['epoch', 'day_of_year', 'hour', 'temp', 'windspd', 'winddir', 'wind_x_dir', 'wind_y_dir', 'no', 'no2', 'nox', 'o3']
OUTPUT_COLUMNS = ['no', 'no2', 'nox', 'o3']

# Take the FFT of each sqeuence and use as features
FFT_FEATURES = False

# Fit the sequence to y = mx+b and add the coeff / intercept
REGRESSION_FEATURES = True

# Add variance for each feature in the sequence
STD_FEATURES = True

INPUT_MAP = {value: idx for idx, value in enumerate(INPUT_COLUMNS)}
OUTPUT_MAP = {value: idx for idx, value in enumerate(OUTPUT_COLUMNS)}

NUM_INPUTS = len(INPUT_COLUMNS)
NUM_OUTPUTS = len(OUTPUT_COLUMNS)

In [3]:
import numpy as np

data_sequences = np.load('000_sequences.npy')
data_latlong = np.load('000_latlong_features.npy')
data_sequence_features = np.load('000_sequence_features.npy')

labels = np.load('000_labels.npy')

data_sequences = data_sequences.reshape(data_sequences.shape[0], data_sequences.shape[1]*data_sequences.shape[2])

data = np.concatenate((data_sequences, data_sequence_features), 1)


(107900, 168, 12)
(107900, 2)
(107900, 27)
(107900, 4)
(107900, 2043)


In [5]:
# Train validation split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    data, labels, test_size=0.2, random_state=42)

In [None]:
# Training

from sklearn.ensemble import RandomForestRegressor

best_r2 = None

for epoch in range(0, 100):
    regr = RandomForestRegressor(random_state=epoch, n_estimators=100, n_jobs=-1, verbose=0)
    regr.fit(X_train, y_train)
    r2 = regr.score(X_test, y_test)
    
    save = False
    
    if best_r2 is None:
        print("epoch(%d) - R^2: %f" % (epoch+1, r2))
        best_r2 = r2
        save = True
    elif r2 > best_r2:
        print("epoch(%d) - R^2 improved: %f (best: %f)" % (epoch+1, r2, best_r2))
        best_r2 = r2
        save = True
    else:
        print("epoch(%d) - R^2 did not improve: %f (best: %f)" % (epoch+1, r2, best_r2))
    
    if save:
        open('random-forest.best.pickle', 'wb').write(pickle.dumps(regr))
        open('scaler.best.pickle', 'wb').write(pickle.dumps(scaler))