# Environment prep

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import os

# Load data

In [50]:
import os
import pandas as pd

# Specify the path to your main folder
main_folder_path = '/Users/jonathankoong/Documents/GitHub/ESP32-WiFi-Sensing/datasets/03-tvat-segments'

# Initialize an empty list to store flattened arrays and labels
all_flattened_arrays = []
all_labels = []

# Iterate through specific subfolders
for activity_folder in ["SO", "LL", "RA"]: #, "JJ", "LA", "RL", "NA"]: # classes 0 - 6
    activity_folder_path = os.path.join(main_folder_path, activity_folder)

    # Check if it's a directory
    if os.path.isdir(activity_folder_path):
        # Iterate through files in the subfolder
        for file_name in os.listdir(activity_folder_path):
            if file_name.endswith('.csv'):
                file_path = os.path.join(activity_folder_path, file_name)

                # Read the data from each CSV file into a DataFrame
                # df = pd.read_csv(file_path)  # Adjust the read_csv parameters based on your file format

                # Read the data from each CSV file into a DataFrame
                df = pd.read_csv(file_path, index_col=0)  # Use first column as index
                
                # Drop the 'timestamp' column
                df = df.drop(columns=['timestamp'])

                # Drop guard band subcarriers and DC null (column names are strings)
                columns_to_drop = ['2', '3', '4', '5', '32', '59', '60', '61', '62', '63']
                df = df.drop(columns=columns_to_drop, errors='ignore')

                # Flatten the DataFrame into a 1D array
                flattened_array = df.values.flatten()

                # Append the flattened array to the list
                all_flattened_arrays.append(flattened_array)

                # Append the label to the list
                all_labels.append(activity_folder)

# Create a DataFrame from the flattened arrays
combined_df = pd.DataFrame(all_flattened_arrays)

# Add a label column with the folder names
combined_df['label'] = all_labels


In [51]:
combined_df.shape

(525, 10801)

In [52]:
#combined_df = combined_df.iloc[:, 1:]
combined_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10791,10792,10793,10794,10795,10796,10797,10798,10799,label
0,105.60303,4.0,29.907444,28.615831,19.825764,27.628944,19.30298,27.627623,29.752916,25.797134,...,15.134412,14.045612,13.45412,15.081297,15.2366,13.815377,15.487723,16.161135,15.22263,SO
1,105.60303,4.0,35.4359,32.111202,34.941986,34.966465,30.970886,29.765505,33.494366,27.064217,...,9.26731,10.472629,12.653081,10.365606,8.482576,13.205768,12.607863,13.19123,10.58245,SO
2,105.60303,4.0,28.678368,26.948063,29.076746,26.893276,26.750805,25.161634,25.242552,23.887272,...,15.078357,15.417873,13.793548,14.571228,12.989577,12.409041,12.444606,12.549515,13.096099,SO
3,105.60303,4.0,23.575949,22.86184,22.801249,19.57878,22.610231,20.190742,21.172098,20.6215,...,11.19565,10.837091,9.713359,10.499819,9.711148,8.702316,9.072693,8.859162,8.950066,SO
4,105.60303,4.0,25.294096,28.87046,27.890263,27.626226,28.290956,28.891348,27.397211,27.855785,...,14.181855,12.957934,14.783259,15.907819,14.568509,15.477662,15.702929,13.719675,13.862663,SO
5,105.60303,4.0,25.360243,27.000307,26.333126,25.987385,26.713417,26.627092,25.86437,27.574638,...,14.059562,13.0344,13.831089,12.411201,12.986338,13.15677,12.413895,12.552805,11.527557,SO
6,105.60303,4.0,15.76671,16.02082,16.549532,17.14237,16.550337,15.803808,13.302514,13.450375,...,18.897379,18.874664,13.853906,19.551609,13.852486,20.200146,20.973116,20.6425,21.033592,SO
7,105.60303,4.0,28.0391,29.377169,28.489439,28.330612,28.888783,30.476643,27.59494,29.12952,...,10.480808,11.319244,11.328136,12.37987,12.079991,10.954422,13.22218,12.963613,13.78492,SO
8,105.60303,4.0,23.022243,22.621902,22.456533,23.46169,17.10931,16.819458,21.68576,15.803164,...,10.996972,11.340766,11.531186,12.181234,12.042315,11.346938,12.614502,12.040687,12.305752,SO
9,105.60303,4.0,21.041311,21.251022,19.161726,19.811663,20.116665,20.484148,19.391909,19.529974,...,12.831701,12.353397,12.568945,11.893066,13.017181,13.795824,13.802479,13.669675,14.736217,SO


# Data prep

In [53]:
from sklearn.model_selection import train_test_split
X = combined_df.drop('label', axis=1)
y = combined_df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [54]:
print(X_train.head(10))

         0      1          2          3          4          5          6      \
314  105.60303    4.0  27.310316  27.086401  26.261332  29.031988  27.377264   
467  105.60303    4.0  27.590157  27.530125  27.793114  25.908903  28.750614   
94   105.60303    4.0  26.206268  26.147360  17.714241  18.659363  17.782763   
456  105.60303    4.0  27.966055  28.319980  28.192162  28.496426  29.706180   
354  105.60303    4.0  21.500654  20.905096  20.699488  22.053700  23.455791   
23   105.60303    4.0   3.380016   8.038288   5.705960   9.001771   6.966027   
249  105.60303    4.0   8.197696   8.380623   7.632053   8.051704   7.384277   
272  105.60303    4.0  11.258986  15.529063  12.649450  16.328825  13.918297   
501  105.60303    4.0  20.742592  22.131620  20.835306  22.100754  20.058554   
5    105.60303    4.0  25.360243  27.000307  26.333126  25.987385  26.713417   

         7          8          9      ...      10790      10791      10792  \
314  27.222242  28.172104  27.846365  ...

In [55]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)
print("Label mapping:", dict(enumerate(label_encoder.classes_)))


Label mapping: {0: 'LL', 1: 'RA', 2: 'SO'}


In [56]:
print(y_train)

[0 1 2 1 1 2 0 0 1 2 2 2 1 1 2 1 1 2 1 1 2 2 0 0 2 1 0 2 2 0 1 2 2 2 1 2 2
 0 0 0 0 2 0 1 1 1 1 1 1 2 1 2 1 0 0 1 1 0 0 2 2 1 2 2 2 0 1 1 2 2 2 2 0 0
 2 0 0 1 2 0 0 0 1 1 1 0 1 1 1 0 0 2 0 2 1 0 0 0 2 2 0 0 1 2 1 2 0 0 1 2 1
 2 0 0 2 1 2 2 0 0 2 1 1 2 0 0 0 0 0 1 1 1 0 2 0 0 1 0 2 1 0 1 1 1 1 0 0 1
 1 2 0 0 1 1 1 1 0 1 1 2 0 2 0 0 0 2 2 1 0 1 1 2 1 0 1 2 1 0 2 1 2 1 2 0 2
 0 0 1 2 2 2 2 0 2 1 1 0 2 2 0 1 1 1 0 2 1 1 0 0 1 0 1 2 0 1 2 0 1 0 1 0 2
 0 0 2 2 2 0 0 0 1 2 1 0 0 1 0 2 1 1 2 2 2 2 1 2 0 1 2 0 2 2 2 1 1 1 0 1 0
 1 1 0 0 0 0 0 1 0 1 1 0 1 0 0 2 2 0 1 0 0 0 2 2 1 2 1 1 2 1 2 0 2 1 0 1 1
 1 2 0 1 2 1 0 0 2 0 2 1 0 2 0 1 2 0 1 0 0 2 1 2 1 0 1 0 1 1 0 1 0 2 1 2 1
 0 0 2 0 1 2 0 1 0 1 0 1 1 1 0 0 2 2 2 1 2 1 2 1 0 0 1 2 2 2 2 0 1 2]


# Model training

In [57]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [58]:
# Logistic Regression model
logreg_model = LogisticRegression(multi_class='auto', penalty='l2', C=1.0, solver='liblinear')
logreg_model.fit(X_train, y_train)

# Predictions and evaluation
logreg_predictions = logreg_model.predict(X_test)
logreg_accuracy = accuracy_score(y_test, logreg_predictions)
print("Logistic Regression Accuracy:", logreg_accuracy)
print("Logistic Regression Classification Report:\n", classification_report(y_test, logreg_predictions))



Logistic Regression Accuracy: 0.8291139240506329
Logistic Regression Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.70      0.76        50
           1       0.88      0.96      0.92        53
           2       0.78      0.82      0.80        55

    accuracy                           0.83       158
   macro avg       0.83      0.83      0.83       158
weighted avg       0.83      0.83      0.83       158



In [59]:
# Support Vector Classifier (SVC)
svm_model_rbf = SVC(kernel='linear', C=1)
svm_model_rbf.fit(X_train, y_train)

# Predictions and evaluation for SVC
svm_predictions = svm_model_rbf.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM Accuracy:", svm_accuracy)
print("SVM Classification Report:\n", classification_report(y_test, svm_predictions))

SVM Accuracy: 0.8544303797468354
SVM Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.84      0.82        50
           1       0.90      0.89      0.90        53
           2       0.87      0.84      0.85        55

    accuracy                           0.85       158
   macro avg       0.85      0.85      0.85       158
weighted avg       0.86      0.85      0.85       158



In [60]:
# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, max_features="sqrt", max_depth=20)
rf_model.fit(X_train, y_train)

# Predictions and evaluation for Random Forest
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest Classification Report:\n", classification_report(y_test, rf_predictions))

Random Forest Accuracy: 0.9367088607594937
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.90      0.94        50
           1       0.94      0.96      0.95        53
           2       0.90      0.95      0.92        55

    accuracy                           0.94       158
   macro avg       0.94      0.94      0.94       158
weighted avg       0.94      0.94      0.94       158



In [61]:
!pip install xgboost



In [62]:
import xgboost as xgb

# XGBoost Classifier
xgb_model = xgb.XGBClassifier(objective='multi:softmax', num_class=3, random_state=42)
xgb_model.fit(X_train, y_train)

# Predictions and evaluation
xgb_predictions = xgb_model.predict(X_test)
xgb_accuracy = accuracy_score(y_test, xgb_predictions)
print("XGBoost Accuracy:", xgb_accuracy)
print("XGBoost Classification Report:\n", classification_report(y_test, xgb_predictions))

XGBoost Accuracy: 0.9113924050632911
XGBoost Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.90      0.90        50
           1       0.94      0.94      0.94        53
           2       0.89      0.89      0.89        55

    accuracy                           0.91       158
   macro avg       0.91      0.91      0.91       158
weighted avg       0.91      0.91      0.91       158

