In [2]:
import joblib
import numpy as np
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import time
import numpy as np

In [3]:
approximation_raw = joblib.load("../feats/low_high_pass/approximation_raw.joblib")
detail_raw = joblib.load("../feats/low_high_pass/detail_raw.joblib")

In [4]:
approximation_raw[0][0]

array([-5.12269944e-05, -2.20914299e-04, -3.13711847e-04, ...,
        7.84276926e-05,  1.08471024e-04,  0.00000000e+00], dtype=float32)

In [5]:
# approximation_raw = np.array(approximation_raw)
# detail_raw = np.array(detail_raw)

In [6]:
detail_raw.shape

AttributeError: 'list' object has no attribute 'shape'

In [None]:
approximation_raw.shape

In [None]:
approximation_raw[0]

In [7]:
from collections import Counter
import scipy
import operator

def calculate_entropy(list_values):
    counter_values = Counter(list_values).most_common()
    probabilities = [elem[1]/len(list_values) for elem in counter_values]
    entropy=scipy.stats.entropy(probabilities)
    return entropy

def calculate_statistics(list_values):
    n5 = np.nanpercentile(list_values, 5)
    n25 = np.nanpercentile(list_values, 25)
    n75 = np.nanpercentile(list_values, 75)
    n95 = np.nanpercentile(list_values, 95)
    median = np.nanpercentile(list_values, 50)
    mean = np.nanmean(list_values)
    std = np.nanstd(list_values)
    var = np.nanvar(list_values)
    rms = np.nanmean(np.sqrt(list_values**2))
    return [n5, n25, n75, n95, median, mean, std, var, rms]

def calculate_crossings(list_values):
    zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]
    no_zero_crossings = len(zero_crossing_indices)
    mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]
    no_mean_crossings = len(mean_crossing_indices)
    return [no_zero_crossings, no_mean_crossings]

def get_features(list_values):
    entropy = calculate_entropy(list_values)
    crossings = calculate_crossings(list_values)
    statistics = calculate_statistics(list_values)
    return statistics + crossings + [entropy]

In [9]:
ar = approximation_raw
dr = detail_raw

In [11]:
len(ar[0])

5

In [12]:
X_1 = []
for i in range (0, len(ar)):
    features = []
    for j in range (0, len(ar[0])):
        ficzur = get_features(ar[i][j])
        features.append(ficzur)
    X_1.append(features)

In [13]:
len(X_1)

5252

In [14]:
X_2 = []
for i in range (0, len(dr)):
    features = []
    for j in range (0, len(dr[0])):
        ficzur = get_features(dr[i][j])
        features.append(ficzur)
    X_2.append(features)

In [15]:
len(X_2)

5252

In [16]:
x_1 = np.asarray(X_1)

In [17]:
x_2 = np.asarray(X_2)

In [18]:
print(x_1.shape)
print(x_2.shape)

(5252, 5, 12)
(5252, 5, 12)


In [19]:
X = np.concatenate((x_1,x_2), axis=1)

In [20]:
X.shape

(5252, 10, 12)

In [21]:
X_name = '../feats/low_high_pass/X_10_banks.joblib'
joblib.dump(X, X_name)

['../feats/low_high_pass/X_10_banks.joblib']

In [22]:
y = joblib.load("../feats/low_high_pass/y.joblib")

In [23]:
#y = np.array(y, dtype=np.ndarray)

In [24]:
x = np.asarray(X)
y = y
x = x.reshape(5252, -1)
y = np.asarray(y)
#y = y[..., np.newaxis]

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

In [25]:
y_train.shape

(3518,)

In [26]:
start_time = time.time()

model = XGBClassifier(learning_rate = 0.1,
                      n_estimators = 1000,
                      max_depth = 8,
                      min_child_weight=3,
                      subsample = 0.8,
                      colsample_bytree = 0.8,
                      gamma = 0
                     )
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

print("--- Model trained. Training time: %s seconds ---" % (time.time() - start_time))

accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

--- Model trained. Training time: 257.34712958335876 seconds ---
Accuracy: 76.24%
