In [1]:
import pandas as pd
import bz2

from sklearn.linear_model import LinearRegression, Ridge
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeRegressor

import sys
sys.path.append("..")
from meta_stream import MetaStream

In [2]:
df_rows = 50000
file = bz2.open("../../data/DataExpo2009/2008.csv.bz2", 'r')
df = pd.read_csv(file, nrows=df_rows)
df = df[['DepTime', 'CRSDepTime', 'CRSArrTime', 'ArrDelay', 'Distance']]
df = df.dropna(axis=0, how='any')

In [3]:
# NOTE: list of regression algorithms
models =    [
            RandomForestRegressor(random_state=42),
            LinearRegression(),
            GradientBoostingRegressor(random_state=42)
            ]

Parameter settings for MetaStream (considering a meta-level window size of 200)

In [4]:
base_data_window = 1000
base_delay_window = 2
base_sel_window_size = 25
meta_data_window = 200

Use Random Forest Classifier as meta learner

In [5]:
meta_learner = RandomForestClassifier()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 3.180+-10.657
Meta-level score recommended 0.538
Mean score default 3.192+-11.789
Meta-level score default 0.478
Mean score ensemble 3.322+-3.821


Use KNN Classifier as meta learner

In [6]:
meta_learner = KNeighborsClassifier()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 3.358+-7.074
Meta-level score recommended 0.619
Mean score default 3.192+-11.789
Meta-level score default 0.478
Mean score ensemble 3.322+-3.821


Use Naive Bayesian Classifier as meta learner

In [7]:
meta_learner = GaussianNB()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 3.429+-4.357
Meta-level score recommended 0.729
Mean score default 3.192+-11.789
Meta-level score default 0.478
Mean score ensemble 3.322+-3.821


Parameter settings for MetaStream (considering a meta-level window size of 300)

In [8]:
base_data_window = 1000
base_delay_window = 2
base_sel_window_size = 25
meta_data_window = 300

Use Random Forest Classifier as meta learner

In [9]:
meta_learner = RandomForestClassifier()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 3.268+-10.683
Meta-level score recommended 0.538
Mean score default 3.248+-12.113
Meta-level score default 0.468
Mean score ensemble 3.446+-3.887


Use KNN Classifier as meta learner

In [10]:
meta_learner = KNeighborsClassifier()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 3.346+-6.835
Meta-level score recommended 0.619
Mean score default 3.248+-12.113
Meta-level score default 0.468
Mean score ensemble 3.446+-3.887


Use Naive Bayesian Classifier as meta learner

In [11]:
meta_learner = GaussianNB()
metas = MetaStream(meta_learner, models, base_data_window, base_delay_window, base_sel_window_size, meta_data_window, strategy='combination', default=True, ensemble=True, pairs=False)
metas.base_train(data=df, target='ArrDelay')
metas.meta_train(data=df, target='ArrDelay')
metas.print_results()

Mean score recommended 3.594+-4.451
Meta-level score recommended 0.744
Mean score default 3.248+-12.113
Meta-level score default 0.468
Mean score ensemble 3.446+-3.887
