# Extracción de características en series temporales
## Grado en Ingeniería Informática. Universidad de Burgos
**Autor:** Alicia Olivares Gil

In [None]:
%matplotlib inline 
#para dibujar en el propio notebook
import pandas as pd # se importa pandas como pd
import numpy as np  #numpy como np
import matplotlib.pyplot as plt #matplotlib.pyplot como plot
import pickle as pk
import loadData as ld
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn import metrics as mtr
import seaborn as sns
import tsfresh as tf

### Cargar días de la crisis 1 y 2: 

In [None]:
with open('diacrisis1.pdd','rb') as f: 
    diacrisis1 = pk.load(f) 
print(len(diacrisis1))
diacrisis1.head()

In [None]:
with open('diacrisis2.pdd','rb') as f: 
    diacrisis2 = pk.load(f) 
print(len(diacrisis2))
diacrisis2.head()

### Función de extracción de características por ventana: 

In [None]:
import tsfresh as tf

def rolling_extract_features(X, window): 
    features = pd.DataFrame()
    for i in range(len(X)-window): 
        X_rolling = X.iloc[i:i+window]
        features_rolling = tf.extract_features(X_rolling, column_id='id', column_sort='DateTime')
        features = pd.concat([features,features_rolling],axis=0)
    return features

### Extracción de características de los días de la crisis 1 y 2: 

In [None]:
X1,y1 = diacrisis1.drop(['target'],axis=1), diacrisis1['target']
X1['id'] = 1
X1 = X1.reset_index(drop=True)
print(len(X1))
X1.head()

In [None]:
X2,y2 = diacrisis2.drop(['target'],axis=1), diacrisis2['target']
X2['id'] = 1
X2 = X2.reset_index(drop=True)
print(len(X2))
X2.head()

In [None]:
features_diacrisis1 = rolling_extract_features(X1,90)
print(len(features_diacrisis1))
features_diacrisis1.head()

In [None]:
features_diacrisis2 = rolling_extract_features(X2,90)
print(len(features_diacrisis2))
features_diacrisis2.head()

### Añadir target a las características calculadas: 

In [None]:
features_diacrisis1.reset_index(drop=True)
y=y1[90:].reset_index(drop=True)
features_diacrisis1 = pd.concat([features_diacrisis1,y],axis=1)
print(len(features_diacrisis1))
features_diacrisis1.head()

In [None]:
features_diacrisis2.reset_index(drop=True)
y=y2[90:].reset_index(drop=True)
features_diacrisis2 = pd.concat([features_diacrisis2,y],axis=1)
print(len(features_diacrisis2))
features_diacrisis2.head()

### Guardar características: 

In [None]:
#guardar resultados
with open('features_diacrisis1_p1.pdd','wb') as f:
    pk.dump(features_diacrisis1[:50000],f)
with open('features_diacrisis1_p2.pdd','wb') as f: 
    pk.dump(features_diacrisis1[50000:],f)
with open('features_diacrisis2_p1.pdd','wb') as f:
    pk.dump(features_diacrisis2[:50000],f)
with open('features_diacrisis2_p2.pdd','wb') as f: 
    pk.dump(features_diacrisis2[50000:],f)