In [1]:
import h5py
import numpy as np
import pandas as pd

In [2]:
from tqdm import tqdm

In [3]:
import scipy.signal as sig
from scipy.fft import fft, fftfreq

In [4]:
from sklearn.ensemble import RandomForestClassifier

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, auc, roc_curve

In [5]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [6]:
from joblib import Parallel, delayed

In [7]:
arrays = {}
with h5py.File("./data/Group1/group1_subset_1.mat", 'r') as f:
    for k, v in f.items():
        arrays[k] = np.array(v)

In [8]:
y = arrays['group1_radarStatusSubset_1']
y = y.reshape(200)

In [9]:
def preprocess_signal(x):
    i = np.array([real[0] for real in x])
    q = np.array([imag[1] for imag in x])
    i_max = i.max()
    q_max = q.max()
    i_min = i.min()
    q_min = q.min()
    i_mean = i.mean()
    q_mean = q.mean()
    i_range = i_max - i_min
    q_range = q_max - q_min
    num_pts_away_q = (q > 1e-6).sum()
    num_pts_away_q += (q < -1e-6).sum()
    num_pts_away_i = (i > 1e-6).sum()
    num_pts_away_i += (i < -1e-6).sum()
    temp = {
        "i_max": i_max,
        "q_max": q_max,
        "i_min": i_min,
        "q_min": q_min,
        "i_mean": i_mean,
        "q_mean": q_mean,
        "i_range": i_range,
        "q_range": q_range,
        "num_pts_away_q": num_pts_away_q,
        "num_pts_away_i": num_pts_away_i,
    }
    return temp

In [10]:
# X = []
# for x in tqdm(arrays['group1_waveformSubset_1']):
#     X.append(preprocess_signal(x))

In [11]:
X_preprocessed = Parallel(n_jobs=4)(delayed(preprocess_signal)(x) for x in tqdm(arrays['group1_waveformSubset_1']))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [01:46<00:00,  1.88it/s]


In [12]:
df = pd.DataFrame(X_preprocessed)

In [13]:
df.head()

Unnamed: 0,i_max,q_max,i_min,q_min,i_mean,q_mean,i_range,q_range,num_pts_away_q,num_pts_away_i
0,1e-06,1e-06,-1e-06,-1e-06,2.700156e-10,-1.53327e-10,3e-06,2e-06,54,59
1,1e-06,1e-06,-1e-06,-1e-06,-2.927103e-10,1.17208e-10,2e-06,3e-06,54,55
2,2e-06,2e-06,-2e-06,-1e-06,4.382238e-10,-1.445831e-10,3e-06,3e-06,65,79
3,1e-06,1e-06,-1e-06,-1e-06,3.231834e-10,-8.700088e-11,2e-06,2e-06,55,40
4,1e-06,1e-06,-1e-06,-1e-06,2.754731e-10,-1.286641e-11,2e-06,2e-06,38,50


In [74]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.3)

In [96]:
clf = RandomForestClassifier(max_depth=4, random_state=12)

In [97]:
clf.fit(X_train, y_train)

RandomForestClassifier(max_depth=4, random_state=12)

In [98]:
y_pred = clf.predict(X_test)

In [99]:
accuracy_score(y_test, y_pred)

0.9333333333333333

In [100]:
f1_score(y_test, y_pred)

0.9130434782608695

In [101]:
fpr, tpr, thresholds = roc_curve(y_test, y_pred)

In [102]:
auc(fpr, tpr)

0.9236111111111112

In [103]:
y.sum()

105