In [41]:
import sys
import pandas as pd
import os

sys.path.append('/Users/cseveriano/spatio-temporal-forecasting/src/ext-libraries/pyFTS')

from pyFTS.common import Util as cUtil

os.chdir("/Users/cseveriano/spatio-temporal-forecasting/")

## Análise dos dados
- Abertura dos arquivos gerados anteriormente
- Separação de bases de treinamento e testes

In [32]:
fln_df = pd.read_csv('data/processed/SONDA/FLN-15min.csv', sep=";")
fln_train = fln_df[(fln_df.date >= '2013-11-01') & (fln_df.date <= '2014-10-31')]
fln_test = fln_df[(fln_df.date >= '2014-11-01') & (fln_df.date <= '2015-10-31')]

## Transformações nos dados

In [38]:
from pyFTS.common import Transformations
diff = Transformations.Differential(1)

## Particionamento do Universo de Discurso

Particionamento único para teste da ferramenta

In [22]:
import importlib
import random as rnd
from pyFTS.common import FuzzySet,Membership, Transformations

from pyFTS.partitioners import Grid, CMeans, Grid, FCM, Huarng, Util, Entropy  

fln_fuzzysets = Grid.GridPartitioner(fln_train.glo_avg,10)

Multiplos particionamentos com os metodos Grid e Entropy. <br>
Cada modelo de conjuntos nebulosos é salvo na pasta model

In [46]:
from copy import deepcopy
from joblib import Parallel, delayed
import multiprocessing


def train_individual_partitioner(method, partitions, transformation, train_data):
    pttr = str(method.__module__).split('.')[-1]
    t = "_diff" if transformation is not None else ""
    _key = "sonda_fs_" + pttr + str(partitions) + t

    print(_key)

    model = method(train_data, partitions, transformation=transformation)
    
    cUtil.persist_obj(model, "models/fuzzysets/"+_key+".pkl")
    
    return _key


transformations = [None, diff]

methods = [Grid.GridPartitioner]

data = fln_train.glo_avg.tolist()

num_cores = multiprocessing.cpu_count()

pool = {}
count = 0

for method in methods:
    for max_part in [10, 20, 30, 40, 50, 60, 70, 80, 90]:
        for t in transformations:
            pool[count] = {'m': method, 'pt': max_part, 't': t}
            count += 1
            
results = Parallel(n_jobs=num_cores)(delayed(train_individual_partitioner)(deepcopy(pool[m]['m']), pool[m]['pt'], 
                                                                           deepcopy(pool[m]['t']), deepcopy(data))
                                     for m in pool.keys())
            
#for k in results:
#    print(k)

sonda_fs_Grid10
sonda_fs_Grid10_diff
sonda_fs_Grid20
sonda_fs_Grid20_diff
sonda_fs_Grid30
sonda_fs_Grid30_diff
sonda_fs_Grid40
sonda_fs_Grid40_diff
sonda_fs_Grid50
sonda_fs_Grid50_diff
sonda_fs_Grid60
sonda_fs_Grid60_diff
sonda_fs_Grid70
sonda_fs_Grid70_diff
sonda_fs_Grid80
sonda_fs_Grid80_diff
sonda_fs_Grid90
sonda_fs_Grid90_diff


## Treinamento do modelo

Treinamento único com FTS HighOrder

In [29]:
from pyFTS import common
from pyFTS import fts
from pyFTS import hofts

hofts = hofts.HighOrderFTS("")
hofts.train(fln_train.glo_avg.tolist(),fln_fuzzysets.sets,6)
print(hofts)
#forecast_result = hofts.forecast(fln_test.glo_avg.tolist())


High Order FTS:
(0, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (1, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (2, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (3, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (4, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (5, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>) -> A0,A1
(0, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (1, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (2, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (3, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (4, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (5, <pyFTS.common.FuzzySet.FuzzySet object at 0x1118616d8>) -> A0,A1
(0, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (1, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (2, <pyFTS.common.FuzzySet.FuzzySet object at 0x111861668>), (3, <pyFTS.common.FuzzySet.FuzzySet object at 0x11186

Treinamento com múltiplos modelos - High Order.
Os modelos são gerados a partir da combinação entre diferentes modelos de particionamento e lags de High Order 

In [None]:
from copy import deepcopy
from joblib import Parallel, delayed
import multiprocessing
import glob
from pyFTS import hofts


def train_model(train_data, partitioner, lags):
    pttr = str(partitioner.__module__).split('.')[-1]
    t = "_diff" if partitioner.transformation is not None else ""
    _key = "sonda_hofts_" + str(lags) + "_" + pttr + str(partitioner.partitions)  + t 
    print(_key)
    model = hofts.HighOrderFTS(_key)
    model.appendTransformation(partitioner.transformation)
    model.train(train_data, partitioner.sets, lags)

    cUtil.persist_obj(model, "models/fts/highorder/"+_key+".pkl")
        
    return _key

train_data = fln_train.glo_avg.tolist()

lags = [10, 15, 20]


partitioners = []


for file in glob.glob("models/fuzzysets/*.pkl"):
    obj = cUtil.load_obj(file)
    partitioners.append( obj )
    
num_cores = multiprocessing.cpu_count()

pool = {}
count = 0
for lg in lags:
    for pt in partitioners:
        pool[count] = {'lg': lg, 'pt': pt}
        count += 1

results = Parallel(n_jobs=num_cores)( delayed(train_model)(deepcopy(train_data), 
                                                           deepcopy(pool[m]['pt']),deepcopy(pool[m]['lg']))
                                     for m in pool.keys())

for tmp in results:
    print(tmp)

sonda_hofts_10_Grid10
sonda_hofts_10_Grid10_diff
sonda_hofts_10_Grid20
sonda_hofts_10_Grid20_diff


## Predição da Série

In [33]:
from pyFTS.benchmarks import Measures

_rmse, _smape, _u = Measures.get_point_statistics(fln_test.glo_avg.tolist(), hofts)


TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('<U32') dtype('<U32') dtype('<U32')

In [34]:
print("RMSE: ", _rmse, "\n")
print("SMAPE: ",_smape, "\n")
print("U: ",_u, "\n")

RMSE:  71.62 

SMAPE:  nan 

U:  nan 



Predição dos modelos gerados nas diferentes combinaçoes de particionamento e ordem de FTS

In [61]:
from pyFTS.benchmarks import Measures
from copy import deepcopy
from joblib import Parallel, delayed
import multiprocessing
import glob

def test_model(key, model_file, test_data):
    model = cUtil.load_obj(model_file)
    _rmse, _smape, _u = Measures.get_point_statistics(test_data, model)
    del model
    return {'key': key, 'rmse': _rmse}

models = {}
test_data = fln_test.glo_avg.tolist()
#executions = {}

for file in glob.glob("models/fts/highorder/*.pkl"):
    #print(file + "\n")
    key = str(file).split('.')[0].split('/')[-1]
    print(key + "\n")
    models[key] = file

num_cores = multiprocessing.cpu_count()

executions = Parallel(n_jobs=num_cores)( delayed(test_model)(m, deepcopy(models[m]), deepcopy(test_data)) 
                                        for m in models.keys())
results = {}
for tmp in executions:
    if tmp['rmse'] not in results:
        results[tmp['rmse']] = []
    results[tmp['rmse']].append(tmp['key'])

for k in sorted(results.keys()):
    print(k, results[k])



sonda_hofts_5_Grid10_

sonda_hofts_5_Grid10__diff

sonda_hofts_5_Grid20_

sonda_hofts_5_Grid20__diff

sonda_hofts_5_Grid30_

sonda_hofts_5_Grid30__diff

sonda_hofts_5_Grid40_

sonda_hofts_5_Grid40__diff

sonda_hofts_5_Grid50_

sonda_hofts_5_Grid50__diff

sonda_hofts_5_Grid60_

sonda_hofts_5_Grid60__diff

sonda_hofts_5_Grid70_

sonda_hofts_5_Grid70__diff

sonda_hofts_5_Grid80_

sonda_hofts_5_Grid80__diff

sonda_hofts_5_Grid90_

sonda_hofts_5_Grid90__diff

sonda_hofts_6_Grid10_

sonda_hofts_6_Grid10__diff

sonda_hofts_6_Grid20_

sonda_hofts_6_Grid20__diff

sonda_hofts_6_Grid30_

sonda_hofts_6_Grid30__diff

sonda_hofts_6_Grid40_

sonda_hofts_6_Grid40__diff

sonda_hofts_6_Grid50_

sonda_hofts_6_Grid50__diff

sonda_hofts_6_Grid60_

sonda_hofts_6_Grid60__diff

sonda_hofts_6_Grid70_

sonda_hofts_6_Grid70__diff

sonda_hofts_6_Grid80_

sonda_hofts_6_Grid80__diff

sonda_hofts_6_Grid90_

sonda_hofts_6_Grid90__diff

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
66.44 ['sonda_hofts_5_Grid90_', 'sonda_hofts_6