## Feature Extraction from EM Data

In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier

In [2]:
# openning the file
hf = h5py.File('/home/asanka/Downloads/ASEADOS-Dataset/em-dataset.h5', 'r')

In [3]:
# printing the attributes of the file
for item in hf.attrs.keys():
    print(item + ":", hf.attrs[item])

dataset_creater: Asanka P. Sayakkara <asa@ucsc.cmb.ac.lk>
dataset_date: 2021-03-01
dataset_version: 1


In [4]:
def printname(name):
    print(name)
    
# viewing full data structure
hf.visit(printname)

internet-of-things
internet-of-things/amazon-echo-dot
internet-of-things/amazon-echo-dot/asking-a-definition
internet-of-things/amazon-echo-dot/asking-for-time
internet-of-things/amazon-echo-dot/asking-to-play-radio
internet-of-things/amazon-echo-dot/controlling-lightbulb
internet-of-things/amazon-echo-dot/device-idle
internet-of-things/amazon-echo-dot/device-muted
internet-of-things/amazon-echo-dot/device-resetting
internet-of-things/amazon-echo-dot/just-wakeup-word
internet-of-things/amazon-echo-dot/powering-on
internet-of-things/amazon-echo-show5
internet-of-things/amazon-echo-show5/asking-a-definition
internet-of-things/amazon-echo-show5/asking-for-time
internet-of-things/amazon-echo-show5/asking-to-play-radio
internet-of-things/amazon-echo-show5/controlling-lightbulb
internet-of-things/amazon-echo-show5/device-idle
internet-of-things/amazon-echo-show5/device-resetting
internet-of-things/amazon-echo-show5/just-wakeup-word
internet-of-things/amazon-echo-show5/powering-off
internet-o

### Some Settings

In [5]:
# number of samples per class
num_samp_per_class = 100

# FFT size for the STFT operation (which is same as the feature vector size)
fft_size = feature_vector_size = 2048 #1024

# number of overlapping samples for the STFT operation
fft_overlap = 256

### STFT Features for the Data File (Class 0)

In [6]:
class_label = 0

In [7]:
# reading a dataset
data = hf.get('/smartphones/iphone4s/calendar-app')
print(np.array(data).shape)

(59900416,)


In [8]:
f, t, Zxx = signal.stft(data, fs=20e6, nperseg=fft_size, noverlap=fft_overlap)
print(len(f))
print(len(t))
print(Zxx.shape)



2048
33428
(2048, 33428)


In [9]:
del data

Zxx = Zxx.transpose()

X = Zxx[:num_samp_per_class]
y = np.full(num_samp_per_class, class_label)
del Zxx

print(X.shape)
print(y.shape)

(100, 2048)
(100,)


### STFT Features for the Data File (Class 1)

In [10]:
class_label = 1

In [11]:
# reading a dataset
data = hf.get('/smartphones/iphone4s/web-browser-app')
print(np.array(data).shape)

(59507712,)


In [12]:
f, t, Zxx = signal.stft(data, fs=20e6, nperseg=fft_size, noverlap=fft_overlap)
print(len(f))
print(len(t))
print(Zxx.shape)

2048
33209
(2048, 33209)


In [13]:
del data

Zxx = Zxx.transpose()

X = np.concatenate((X, Zxx[:num_samp_per_class]), axis=0) 
y = np.concatenate((y, np.full(num_samp_per_class, class_label)), axis=0)
del Zxx

print(X.shape)
print(y.shape)

(200, 2048)
(200,)


### Machine Learning

In [14]:
X = np.abs(X)

In [15]:
y = np.abs(y)

In [16]:
# Split data to train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

In [17]:
clf = MLPClassifier(solver='lbfgs', alpha=1e-20, hidden_layer_sizes=(10, 5), random_state=1)

In [18]:
clf.fit(X_train, y_train)
y_pred = clf.predict (X_test)

In [19]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 4  6]
 [ 0 10]]
              precision    recall  f1-score   support

           0       1.00      0.40      0.57        10
           1       0.62      1.00      0.77        10

    accuracy                           0.70        20
   macro avg       0.81      0.70      0.67        20
weighted avg       0.81      0.70      0.67        20



### Closing the HDF5 File

In [20]:
hf.close()