In [1]:
import pandas as pd
import bz2

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeRegressor

import sys
sys.path.append("..")
from meta_stream import MetaStream

In [2]:
df_rows = 50000
file = bz2.open("../../data/DataExpo2009/2008.csv.bz2", 'r')
df = pd.read_csv(file, nrows=df_rows)
df = df[['DepTime', 'CRSDepTime', 'CRSArrTime', 'ArrDelay', 'Distance']]
df = df.dropna(axis=0, how='any')

In [3]:
# NOTE: list of regression algorithms
models =    [
            RandomForestRegressor(random_state=42),
            LinearRegression(),
            GradientBoostingRegressor(random_state=42)
            ]

Parameter settings for MetaStream (considering a meta-level window size of 200)

In [4]:
base_data_window = 672
base_delay_window = 0
base_sel_window_size = 24
meta_data_window = 200

Use Random Forest Classifier as meta learner

In [5]:
meta_learner = RandomForestClassifier()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 3.083+-12.389
Meta-level score recommended 0.476
Mean score default 2.742+-12.458
Meta-level score default 0.401
Mean score ensemble 3.259+-4.192


Use KNN Classifier as meta learner

In [6]:
meta_learner = KNeighborsClassifier()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 2.708+-12.168
Meta-level score recommended 0.430
Mean score default 2.742+-12.458
Meta-level score default 0.401
Mean score ensemble 3.259+-4.192


Use Naive Bayesian Classifier as meta learner

In [7]:
meta_learner = GaussianNB()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 4.006+-8.999
Meta-level score recommended 0.781
Mean score default 2.742+-12.458
Meta-level score default 0.401
Mean score ensemble 3.259+-4.192


Parameter settings for MetaStream (considering a meta-level window size of 300)

In [8]:
base_data_window = 672
base_delay_window = 0
base_sel_window_size = 24
meta_data_window = 300

Use Random Forest Classifier as meta learner

In [9]:
meta_learner = RandomForestClassifier()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 3.128+-12.755
Meta-level score recommended 0.457
Mean score default 2.764+-12.768
Meta-level score default 0.397
Mean score ensemble 3.369+-4.274


Use KNN Classifier as meta learner

In [10]:
meta_learner = KNeighborsClassifier()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 2.753+-12.385
Meta-level score recommended 0.445
Mean score default 2.764+-12.768
Meta-level score default 0.397
Mean score ensemble 3.369+-4.274


Use Naive Bayesian Classifier as meta learner

In [11]:
meta_learner = GaussianNB()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 3.647+-4.923
Meta-level score recommended 0.748
Mean score default 2.764+-12.768
Meta-level score default 0.397
Mean score ensemble 3.369+-4.274
