<a href="https://colab.research.google.com/github/cseveriano/spatio-temporal-forecasting/blob/master/notebooks/emvfts/20200204_Evolving_FTS_Benchmark_1_Concept_Drift.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Concept Drift Experiment
A comparison between evolving models when applied to different Concept Drift datasets

### Environment Setup

In [0]:
!pip3 install -U git+https://github.com/PYFTS/pyFTS
!pip3 install -U git+https://github.com/cseveriano/spatio-temporal-forecasting
!pip3 install -U git+https://github.com/cseveriano/evolving_clustering
!pip3 install -U git+https://github.com/Felix-neko/matlab_mldivide

Collecting git+https://github.com/PYFTS/pyFTS
  Cloning https://github.com/PYFTS/pyFTS to /tmp/pip-req-build-y6aumjm5
  Running command git clone -q https://github.com/PYFTS/pyFTS /tmp/pip-req-build-y6aumjm5
Building wheels for collected packages: pyFTS
  Building wheel for pyFTS (setup.py) ... [?25l[?25hdone
  Created wheel for pyFTS: filename=pyFTS-1.6-cp36-none-any.whl size=207415 sha256=758ac65985ffb34dc95799c936267f7bb5fc621639862acd96aba42be4481f0f
  Stored in directory: /tmp/pip-ephem-wheel-cache-2graj9_l/wheels/e7/32/a9/230470113df5a73242a5a6d05671cb646db97abf14bbce2644
Successfully built pyFTS
Installing collected packages: pyFTS
Successfully installed pyFTS-1.6
Collecting git+https://github.com/cseveriano/spatio-temporal-forecasting
  Cloning https://github.com/cseveriano/spatio-temporal-forecasting to /tmp/pip-req-build-8g_1_m5d
  Running command git clone -q https://github.com/cseveriano/spatio-temporal-forecasting /tmp/pip-req-build-8g_1_m5d
Building wheels for collected

### Common imports

In [0]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib as plt
import matplotlib.pyplot as plt
import seaborn as sns


from pyFTS.common import Util

%pylab inline

Populating the interactive namespace from numpy and matplotlib


### Synthethic concept drift datasets

In [0]:
from pyFTS.data import artificial

def generate_concept_drift_datasets():
  mu_local = 5
  sigma_local = 0.25
  mu_drift = 3
  sigma_drift = 0.5
  deflen = 200
  totlen = deflen * 10
  order = 5

  signals = {}

  def mavg(l,order=2):
    ret = [] #l[:order]
    for k in np.arange(order, len(l)):
      ret.append( np.nanmean(l[k-order:k])  )
      
    return ret

  signal = artificial.SignalEmulator().stationary_gaussian(mu_local,sigma_local,length=deflen,it=10).run()

  signals['Stationary signal'] = mavg(signal, order)

  signal = artificial.SignalEmulator().stationary_gaussian(mu_local,sigma_local,length=deflen,it=10).blip().blip().run()

  signals['Stationary signal with blip'] = mavg(signal, order)

  signal = artificial.SignalEmulator()\
  .stationary_gaussian(mu_local,sigma_local,length=deflen//2,it=10)\
  .stationary_gaussian(mu_local,sigma_drift,length=deflen//2,it=10, additive=False)\
  .run()

  signals['Sudden Variance'] = mavg(signal, order)

  signal = artificial.SignalEmulator()\
  .stationary_gaussian(mu_local,sigma_local,length=deflen//2,it=10)\
  .stationary_gaussian(mu_drift,sigma_local,length=deflen//2,it=10, additive=False)\
  .run()

  signals['Sudden Mean'] = mavg(signal, order)

  signal = artificial.SignalEmulator()\
  .stationary_gaussian(mu_local,sigma_local,length=deflen//2,it=10)\
  .stationary_gaussian(mu_drift,sigma_drift,length=deflen//2,it=10, additive=False)\
  .run()


  signals['Sudden Mean & Variance'] = mavg(signal, order)

  signal = artificial.SignalEmulator()\
  .stationary_gaussian(mu_local,sigma_local,length=deflen,it=10)\
  .incremental_gaussian(0.1,0,length=totlen//2,start=totlen//2)\
  .run()

  signals['Incremental Mean'] = mavg(signal, order)

  signal = artificial.SignalEmulator()\
  .stationary_gaussian(mu_local,sigma_local,length=deflen,it=10)\
  .incremental_gaussian(0.,0.1,length=totlen//2,start=totlen//2)\
  .run()

  signals['Incremental Variance'] = signal

  signal = artificial.SignalEmulator()\
  .stationary_gaussian(mu_local,sigma_local,length=deflen,it=10)\
  .incremental_gaussian(0.02,0.01,length=totlen//2,start=totlen//2)\
  .run()

  signals['Incremental Mean & Variance'] = mavg(signal, order)

  return signals

In [0]:
def plot_datasets(signals):
  fig, ax = plt.subplots(nrows=4, ncols=2, figsize=[15,10])

  nrows = 4
  ncols = 2

  row = 0
  col = 0

  for key in signals.keys():
    
    if col >= ncols:
      col = 0
      row += 1
    
    ax[row][col].plot(signals[key])
    ax[row][col].set_title(key)
    
    col += 1
    
  plt.tight_layout()

### Benchmarks

In [0]:
from spatiotemporal.models.clusteredmvfts.fts import evolvingclusterfts
from pyFTS.models.multivariate import granular
from spatiotemporal.util import benchmarks
from spatiotemporal.models.benchmarks.fbem import FBeM
from pyFTS.benchmarks import Measures
from spatiotemporal.data import loader

_order = 2

trials = 10
results_df = pd.DataFrame(columns=["Trial","Dataset","Model","RMSE","SMAPE"])

for i in np.arange(trials):

  print('Trial: ', i)
  signals = generate_concept_drift_datasets()

  rows = []

  for row, key in enumerate(signals.keys()):
    print('Processing dataset: ', key)
    s = signals[key]
    mins = min(s)
    maxs = max(s)
    signals[key] = [(s-mins)/(maxs-mins) for s in signals[key]]

    df = loader.series_to_supervised(signals[key], n_in=_order, n_out=1)
    data_input = df.iloc[:,:_order].values
    data_output = df.iloc[:,-1].values

    l = len(df.index)
    limit = l//2
    train = data_input[:limit]
    test = data_input[limit:]

    ## EvolvingFTS Forecast
    evolving_model = evolvingclusterfts.EvolvingClusterFTS(defuzzy='weighted', membership_threshold=0.6, variance_limit=0.001)
    evolving_model.fit(train, order=_order)
    y_hat_df = pd.DataFrame(evolving_model.predict(test))
    forecasts = y_hat_df.iloc[:, -1].values
    _rmse = Measures.rmse(data_output[limit+_order:], forecasts[:-1])
    _smape = Measures.smape(data_output[limit+_order:], forecasts[:-1])
    data = [i, key, "EvolvingFTS", _rmse, _smape]
    rows.append(data)

    ## FBeM Forecast
    fbem_model = FBeM.FBeM()
    fbem_model.n = _order
    fbem_model.fit(train, order=_order)
    forecasts = fbem_model.predict(test)
    _rmse = Measures.rmse(data_output[limit+_order:], forecasts[:-1])
    _smape = Measures.smape(data_output[limit+_order:], forecasts[:-1])
    data = [i, key, "FBeM", _rmse, _smape]
    rows.append(data)

  plt.tight_layout()
  results_df = results_df.append(pd.DataFrame(rows, columns=["Trial","Dataset","Model","RMSE","SMAPE"]))

Trial:  0
Processing dataset:  Stationary signal
Processing dataset:  Stationary signal with blip
Processing dataset:  Sudden Variance
Processing dataset:  Sudden Mean
Processing dataset:  Sudden Mean & Variance
Processing dataset:  Incremental Mean
Processing dataset:  Incremental Variance
Processing dataset:  Incremental Mean & Variance
Trial:  1
Processing dataset:  Stationary signal
Processing dataset:  Stationary signal with blip
Processing dataset:  Sudden Variance
Processing dataset:  Sudden Mean
Processing dataset:  Sudden Mean & Variance
Processing dataset:  Incremental Mean
Processing dataset:  Incremental Variance
Processing dataset:  Incremental Mean & Variance
Trial:  2
Processing dataset:  Stationary signal
Processing dataset:  Stationary signal with blip
Processing dataset:  Sudden Variance
Processing dataset:  Sudden Mean
Processing dataset:  Sudden Mean & Variance
Processing dataset:  Incremental Mean
Processing dataset:  Incremental Variance
Processing dataset:  Incre

<Figure size 432x288 with 0 Axes>

In [0]:
results_df

Unnamed: 0,Trial,Dataset,Model,RMSE,SMAPE
0,0,Stationary signal,EvolvingFTS,0.140644,13.405322
1,0,Stationary signal,FBeM,0.155251,14.385957
2,0,Stationary signal with blip,EvolvingFTS,0.070744,15.446903
3,0,Stationary signal with blip,FBeM,0.125909,17.919088
4,0,Sudden Variance,EvolvingFTS,0.139984,12.041859
...,...,...,...,...,...
11,9,Incremental Mean,FBeM,0.003259,0.911578
12,9,Incremental Variance,EvolvingFTS,0.145819,10.286349
13,9,Incremental Variance,FBeM,0.116048,8.171003
14,9,Incremental Mean & Variance,EvolvingFTS,0.068356,10.482617


In [0]:
from google.colab import files
results_df.to_csv('concept-drift-results.csv') 
files.download('concept-drift-results.csv')

In [0]:
results_df

Unnamed: 0,Trial,Dataset,Model,RMSE,SMAPE
0,0,Stationary signal,EvolvingFTS,0.140644,13.405322
1,0,Stationary signal,FBeM,0.155251,14.385957
2,0,Stationary signal with blip,EvolvingFTS,0.070744,15.446903
3,0,Stationary signal with blip,FBeM,0.125909,17.919088
4,0,Sudden Variance,EvolvingFTS,0.139984,12.041859
...,...,...,...,...,...
11,9,Incremental Mean,FBeM,0.003259,0.911578
12,9,Incremental Variance,EvolvingFTS,0.145819,10.286349
13,9,Incremental Variance,FBeM,0.116048,8.171003
14,9,Incremental Mean & Variance,EvolvingFTS,0.068356,10.482617


In [0]:
results_df.groupby(['Dataset', 'Model'], as_index=False).std()

In [0]:
pd.options.display.float_format = '{:.4f}'.format

In [0]:

results_df.groupby(['Dataset', 'Model'], as_index=False).agg({'RMSE':['mean','std'], 'SMAPE':['mean','std']})

Unnamed: 0_level_0,Dataset,Model,RMSE,RMSE,SMAPE,SMAPE
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std
0,Incremental Mean,EvolvingFTS,0.003,0.0004,1.5872,0.3779
1,Incremental Mean,FBeM,0.0032,0.0,0.9063,0.0255
2,Incremental Mean & Variance,EvolvingFTS,0.0796,0.0056,11.9092,1.4134
3,Incremental Mean & Variance,FBeM,0.0973,0.0097,9.8297,0.3561
4,Incremental Variance,EvolvingFTS,0.1602,0.0138,12.4671,1.2935
5,Incremental Variance,FBeM,0.1302,0.0119,9.8383,1.1677
6,Stationary signal,EvolvingFTS,0.1319,0.01,11.3948,1.4669
7,Stationary signal,FBeM,0.1446,0.0114,12.2478,1.4639
8,Stationary signal with blip,EvolvingFTS,0.0545,0.0329,12.5758,1.373
9,Stationary signal with blip,FBeM,0.0806,0.0532,14.2734,2.1075
