## Classify Pressure Transducer Timeseries using XGB

In [6]:
# ------------------------------------------------------------------------
# ------------------------------------------------------------------------
#
# SCRIPT   : xboost_classifier.py
# POURPOSE : classify timeseries using  boosted trees
#
# AUTHOR   : Caio Eadi Stringari
# EMAIL    : Caio.EadiStringari@uon.edu.au
#
# v1.1     : 23/05/2018 [Caio Stringari]
#
# ------------------------------------------------------------------------
# ------------------------------------------------------------------------

In [7]:
# I/O 1
import pandas as pd
import numpy as np

# eXtreme Gradient Boosting
import xgboost as xgb

from pywavelearn.stats import Hm0, Tm01, Tm02
from pywavelearn.spectral import power_spectrum_density
from pywavelearn.utils import peaklocalextremas
from pywavelearn.stats import (significant_wave_height,
                               significant_wave_period)
# pre-processing
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# metrics
from imblearn.combine import SMOTETomek
from imblearn.metrics import classification_report_imbalanced
from sklearn.metrics import classification_report, confusion_matrix

# I/O 2
from sklearn.externals import joblib

# quite some warnings
import warnings

import seaborn as sns
from xgboost import plot_tree
import matplotlib.pyplot as plt


# set globals
sns.set_context("paper", font_scale=2.0, rc={"lines.linewidth": 2.0})
sns.set_style("ticks", {'axes.linewidth': 2,
                        'legend.frameon': True,
                        'axes.facecolor': "#E9E9F1",
                        'grid.color': "w"})
warnings.filterwarnings("ignore")

In [8]:
def main():

    print("\neXtreme Gradient Boost running, please wait...\n")
    
    print("\n   + Reading data [1/3]")
    # read the input data
    data = pd.read_csv(INPUT)

    X = data[F].values
    y = data["label"].values
    
    # make the split
    X_train, X_test, y_train, y_test, = train_test_split(X, y, shuffle=True,
                                                         stratify=y,
                                                         random_state=STATE,
                                                         train_size=0.7,
                                                         test_size=0.3,)

    # balance the training classes
    if BALANCE:
        resampler = SMOTETomek()
        X_train, y_train = resampler.fit_sample(X_train, y_train)
        X_test, y_test = resampler.fit_sample(X_test, y_test)

    # scale each feature from 0 to 1
    scaler = MinMaxScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    # fit the model
    print("\n   + Training XGB [2/3]")
    dtrain = xgb.DMatrix(X_train, label=y_train)
    model = xgb.XGBClassifier(n_estimators=128, max_depth=16,
                              learning_rate=0.1, subsample=0.5)
    train_model = model.fit(X_train, y_train)

    # predict new data
    y_pred = train_model.predict(X_test)

    print("\nClassification report:")
    if BALANCE:
        print(classification_report_imbalanced(y_test, y_pred))
        f = open(OUTPUT+"_report.txt", "w")
        f.write(classification_report(y_pred, y_test))
        f.close()
    else:
        print(classification_report(y_pred, y_test))
        f = open(OUTPUT+"_report.txt", "w")
        f.write(classification_report(y_pred, y_test))
        f.close()
        
        
    print("\n\nMy work is done!\n")

In [9]:
if __name__ == '__main__':
    
    # constants
    INPUT = "timeseries_features.csv"
    OUTPUT = "TSCLF"
    STATE = 7  # random state
    DELTA = 0.1  # delta value for peaklocalextrema
    BALANCE = True
    
    # Features
    F = ["E", "Hm0", "Tm01", "Tm02", "Tp",
         "h_min", "h_max", "h_avg", "h_std", "Nt"]
    
    # main call
    main()
    
    


eXtreme Gradient Boost running, please wait...


   + Reading data [1/3]

   + Training XGB [2/3]

Classification report:
                   pre       rec       spe        f1       geo       iba       sup

          1       0.95      0.97      0.94      0.96      0.96      0.92      1072
          2       0.97      0.94      0.97      0.96      0.96      0.92      1072

avg / total       0.96      0.96      0.96      0.96      0.96      0.92      2144



My work is done!

