In [None]:
import sys
import os
import csv
import scipy.io
from math import ceil, floor
import pandas as pd
from scipy.stats import skew, kurtosis
from scipy import signal
from scipy.signal import hilbert, medfilt, butter, lfilter, filtfilt, find_peaks, savgol_filter, welch, find_peaks
import numpy as np
from math import sqrt, atan2
import re
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
import glob
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, classification_report,ConfusionMatrixDisplay,precision_score, recall_score, f1_score, roc_auc_score,roc_curve 

In [None]:
#extracting amp and phase values
def extract_amp_phase(filename):
    
    d = pd.read_csv(filename)
    col = [i for i in range(192)]
    df = pd.DataFrame(columns = col)

    amps = []
    phases = []
    #for 'data' column
    for i, j in enumerate(d['data']):

        imaginary = []
        real = []
     
        amp= []
        ph = []
        row= []
        #regular expression check for array
        csi_string = re.findall(r"\[(.*)\]", j)[0]
        csi_raw = [int(x) for x in csi_string.split(",") if x != '']

        for k in range(len(csi_raw)):
            if (k%2 == 0):
                imaginary.append(csi_raw[k])
            else:
                real.append(csi_raw[k])
                
        #amplitude and phase extraction
        for k in range(int(len(csi_raw) / 2)):
            amp.append(round(sqrt(imaginary[k] ** 2 + real[k] ** 2), 1))
            ph.append(round(atan2(imaginary[k], real[k])))

        amps.append(amp)
        phases.append(ph)
        
    llft_amp = []
    llft_phase = []
    ht_amp = []
    ht_phase = []
    #seperating llft amp and phase (26+26 = 52 sc)
    llft_amp = np.array([x[6:32] + x[33:59] for x in amps])
    llft_phase = np.array([x[6:32] + x[33:59] for x in phases])

    #seperating ht amps and phase (57+57 = 114 sc)
    ht_amp = np.array([x[66:123] + x[134:191] for x in amps])
    ht_phase = np.array([x[66:123] + x[134:191] for x in phases])

    return llft_amp, llft_phase, ht_amp, ht_phase


In [None]:
# Hampel filter
def hampel_filter(csi_data):
    # Define the parameters for the Hampel filter
    window_size = 9  # Window size for the median filter
    threshold = 2.0  # Threshold for outlier detection (in standard deviations)

    # Apply the Hampel filter to each subcarrier (array) separately
    hampel_filtered_data = np.zeros_like(csi_data)
    for i in range(csi_data.shape[1]):
        median_filtered = medfilt(csi_data[:, i], kernel_size=window_size)
        deviation = np.abs(csi_data[:, i] - median_filtered)
        mad = np.median(deviation)
        outliers = deviation > threshold * mad
        hampel_filtered_data[:, i] = np.where(outliers, median_filtered, csi_data[:, i])
    
    return hampel_filtered_data

In [None]:
def sg_filter(csi_data):
    window_length = 5
    polyorder = 3
    sg_filter_data = np.zeros_like(csi_data)
    for i in range(csi_data.shape[1]):
        sg_filter_data[:,i] = savgol_filter(csi_data[:,i], window_length, polyorder)
        
    return sg_filter_data

In [None]:
def filtered(amp):
    filter_hampel = hampel_filter(amp)
    filter_sg = sg_filter(filter_hampel)
    return filter_sg


In [None]:
def fe(folder_path, count, label):
   
    col = [i for i in range(469)] # since 52 subcarriers and 9 features 52*9 + label = 469
    df = pd.DataFrame(columns=col)
    
    files = [file for file in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, file))]
    for i, _ in enumerate(range(count)):
        data = []
        train_data = []
        file_name = files[i]
        full_file_name = os.path.join(folder_path, file_name)
        if os.path.exists(full_file_name):

            llft_amp, llft_phase, hft_amp, hft_phase = extract_amp_phase(full_file_name)
            amplitude_data = filtered_data(llft_amp)
            x_min = np.min(amplitude_data, axis=0)
            x_max = np.max(amplitude_data, axis=0)
            x_mean = np.mean(amplitude_data, axis=0)
            x_variance = np.var(amplitude_data, axis=0)
            x_kurtosis = kurtosis(amplitude_data, axis=0)
            x_skew = skew(amplitude_data, axis=0)
            q1 = np.percentile(amplitude_data, 25, axis=0)
            q3 = np.percentile(amplitude_data, 75, axis=0)
            x_iqr = q3 - q1
            x_rms = np.sqrt(np.mean(amplitude_data**2, axis=0))
            x_mean_abs = np.mean(np.abs(amplitude_data), axis=0)
            
            train_data.append(x_min)
            train_data.append(x_max)
            train_data.append(x_mean)
            train_data.append(x_variance)
            train_data.append(x_iqr)
            train_data.append(x_rms)
            train_data.append(x_skew)
            train_data.append(x_kurtosis)
            train_data.append(x_mean_abs)
            train_data = np.append(train_data, label)
           
            data.append(train_data)
          
            df = pd.concat([df, pd.DataFrame(data, columns=col)], ignore_index=True)
            
    return df

In [None]:
def create_dataset():
    s1_a = fe("aug_yoga_data/a/s1_a_", ".csv", 600, 0)
    trained_a = pd.concat([s1_a])
    
    s1_b = fe("aug_yoga_data/b/s1_b_", ".csv", 600, 1)
    trained_b = pd.concat([s1_b])
    
    s1_c = fe("aug_yoga_data/c/s1_c_", ".csv", 600, 2)
    trained_c = pd.concat([s1_c])
    
    s1_d = fe("aug_yoga_data/d/s1_d_", ".csv", 600, 3)
    trained_d = pd.concat([s1_d])

    s1_e = fe("aug_yoga_data/e/s1_e_", ".csv", 600, 4)
    trained_e = pd.concat([s1_e])


    return trained_a, trained_b, trained_c, trained_d, trained_e

In [None]:
data_set = pd.concat([trained_a, trained_b, trained_c, trained_d, trained_e], ignore_index=True)
data_set[468] = data_set[468].astype(int)
data_set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=42, shuffle=True)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
estimator= RandomForestClassifier()
selector = RFE(estimator, n_features_to_select=150, step=1) 
selector = selector.fit(X_train, y_train)


In [None]:
X_train  = X_train[:, selector.support_]
X_test= X_test[:, selector.support_]

In [None]:
models={
#     "Logisitic Regression":LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boost": GradientBoostingClassifier(),
#     "Adaboost":AdaBoostClassifier(),
    "Xgboost": XGBClassifier(),
#     "SVC": SVC(),
    "KNN": KNeighborsClassifier()
}
for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train) # Train model

    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    

    # Training set performance
    model_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
    model_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score
    model_train_precision = precision_score(y_train, y_train_pred,average='micro') # Calculate Precision
    model_train_recall = recall_score(y_train, y_train_pred, average='micro') # Calculate Recall
#     model_train_rocauc_score = roc_auc_score(y_train, y_train_pred)


    # Test set performance
    model_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
    model_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score
    model_test_precision = precision_score(y_test, y_test_pred, average='micro') # Calculate Precision
    model_test_recall = recall_score(y_test, y_test_pred, average='micro') # Calculate Recall
#     model_test_rocauc_score = roc_auc_score(y_test, y_test_pred) #Calculate Roc


    print(list(models.keys())[i])
    
    print('Model performance for Training set')
    print("- Accuracy: {:.4f}".format(model_train_accuracy))
    print('- F1 score: {:.4f}'.format(model_train_f1))
    
    print('- Precision: {:.4f}'.format(model_train_precision))
    print('- Recall: {:.4f}'.format(model_train_recall))
#     print('- Roc Auc Score: {:.4f}'.format(model_train_rocauc_score))

  
    print('----------------------------------')
    
    print('Model performance for Test set')
    print('- Accuracy: {:.4f}'.format(model_test_accuracy))
    print('- F1 score: {:.4f}'.format(model_test_f1))
    print('- Precision: {:.4f}'.format(model_test_precision))
    print('- Recall: {:.4f}'.format(model_test_recall))
#     print('- Roc Auc Score: {:.4f}'.format(model_test_rocauc_score))

    
    print('='*35)
    print('\n')

In [None]:
## Hyperparameter Training
rf_params = {"max_depth": [5, 8, 15, None, 10],
             "max_features": [5, 7, "auto", 8],
             "min_samples_split": [2, 8, 15, 20],
             "n_estimators": [100, 200, 500, 1000]}
xgboost_params = {"learning_rate": [0.1, 0.01],
                  "max_depth": [5, 8, 12, 20, 30],
                  "n_estimators": [100, 200, 300],
                  "colsample_bytree": [0.5, 0.8, 1, 0.3, 0.4]}
gradient_params={"loss": ['log_loss','deviance','exponential'],
             "criterion": ['friedman_mse','squared_error','mse'],
             "min_samples_split": [2, 8, 15, 20],
             "n_estimators": [100, 200, 500],
              "max_depth": [5, 8, 15, None, 10]
                }

# Models list for Hyperparameter tuning
randomcv_models = [("RF", RandomForestClassifier(), rf_params),
    ("XGBoost", XGBClassifier(), xgboost_params),
    ("GradientBoost", GradientBoostingClassifier(), gradient_params)
                   ]


In [None]:
from sklearn.model_selection import RandomizedSearchCV

model_param = {}
for name, model, params in randomcv_models:
    random = RandomizedSearchCV(estimator=model,
                                   param_distributions=params,
                                   n_iter=100,
                                   cv=3,
                                   verbose=2,
                                   n_jobs=-1)
    random.fit(X_train, y_train)
    model_param[name] = random.best_params_

for model_name in model_param:
    print(f"---------------- Best Params for {model_name} -------------------")
    print(model_param[model_name])
    

In [None]:
# replace the values with best params
models={
    
    "Random Forest":RandomForestClassifier(n_estimators=500,min_samples_split=2,max_features=8,max_depth=8),
    "XGBClassifier":XGBClassifier(learning_rate=0.1, max_depth=5, n_estimators=100,colsample_bytree=0.3),
    "GradientBoost":GradientBoostingClassifier(n_estimators=500, min_samples_split=20, max_depth=15,
                                               loss='log_loss', criterion='squared_error')
}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train) # Train model

    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Training set performance
    
    model_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
    model_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score
    model_train_precision = precision_score(y_train, y_train_pred, average='micro') # Calculate Precision
    model_train_recall = recall_score(y_train, y_train_pred,average='micro') # Calculate Recall
#     model_train_rocauc_score = roc_auc_score(y_train, y_train_pred)

    # Test set performance
    model_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
    model_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score
    model_test_precision = precision_score(y_test, y_test_pred, average='micro') # Calculate Precision
    model_test_recall = recall_score(y_test, y_test_pred,average='micro') # Calculate Recall
#     model_test_rocauc_score = roc_auc_score(y_test, y_test_pred) #Calculate Roc

    print(list(models.keys())[i])
    
    print('Model performance for Training set')
    print("- Accuracy: {:.4f}".format(model_train_accuracy))
    print('- F1 score: {:.4f}'.format(model_train_f1))
    
    print('- Precision: {:.4f}'.format(model_train_precision))
    print('- Recall: {:.4f}'.format(model_train_recall))
#     print('- Roc Auc Score: {:.4f}'.format(model_train_rocauc_score))

    
    
    print('----------------------------------')
    
    print('Model performance for Test set')
    print('- Accuracy: {:.4f}'.format(model_test_accuracy))
    print('- F1 score: {:.4f}'.format(model_test_f1))
    print('- Precision: {:.4f}'.format(model_test_precision))
    print('- Recall: {:.4f}'.format(model_test_recall))
#     print('- Roc Auc Score: {:.4f}'.format(model_test_rocauc_score))
    cm = confusion_matrix(y_test, y_test_pred, labels=model.classes_)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
#     disp.savefig('cm_'+model.classes_+'.png')
    disp.plot()
    plt.show()


    print('='*35)
    print('\n')