# HAR with CSI data: Visualization
## Necessary Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import math, os
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

In [2]:
def dataframe_of_CSI(directory):
    # Create empty DataFrames for walk, noact, and jog
    df_walk = pd.DataFrame()
    df_noact = pd.DataFrame()
    df_jog = pd.DataFrame()

    # Loop through each file in the directory
    for file in os.listdir(directory):
        # Check if the file is a CSV file and contains "walk", "noact", or "jog" in the name
        if file.endswith(".csv") and ("walk" in file or "noact" in file or "jog" in file):
            # Read the CSV file and extract the CSI_DATA column
            file_path = os.path.join(directory, file)
            df = pd.read_csv(file_path)
            csi_rows_raw = []

            ## Filtering can be done using
            df = df[(df["bandwidth"]==0)]# & (df["secondary_channel"]==1)]

            ## Ignore first few and last few seconds data
            for one_row in df['CSI_DATA'].iloc[40:-40]:
                one_row = one_row.strip("[]")
                csi_row_raw = [int(x) for x in one_row.split(" ") if x != '']
                csi_rows_raw.append(csi_row_raw)
        
            # Convert the list of lists to a DataFrame and append it to the appropriate DataFrame based on the file name
            csi_df = pd.DataFrame(csi_rows_raw)

            # Check which dataframe we are working on and concat the data
            if "walk" in file:
                df_walk = pd.concat([df_walk, csi_df], axis=0)
            elif "noact" in file:
                df_noact = pd.concat([df_noact, csi_df], axis=0)
            else:
                df_jog = pd.concat([df_jog, csi_df], axis=0)
    return df_walk, df_noact, df_jog

In [3]:
# Define the directory path where the CSV files are located
directory = "C:\\Users\\Dell\\Documents\\Wifi-Sensing-HAR\\data\\our_data"

walk_df, noact_df, jog_df = dataframe_of_CSI(directory)

In [4]:
print("The number of entries found is: ")
print(len(walk_df),len(noact_df),len(jog_df))

The number of entries found is: 
29093 44704 35145


In [5]:
## Extract Amplitude and Phase from the dataframe
def convert_csi_to_amplitude_phase(df):
    total_amplitudes = []
    total_phases = []

    for i, value in enumerate(df.values):
        imaginary = []
        real = []
        amplitudes = [] 
        phases = []

        csi_one_row_lst = value.tolist()

         # Create list of imaginary and real numbers from CSI
        [imaginary.append(csi_one_row_lst[item]) if item%2==0 else real.append(csi_one_row_lst[item]) for item in range(len(csi_one_row_lst))]

        # Transform imaginary and real into amplitude and phase
        val = int(len(csi_one_row_lst)//2)
        for k in range(val):
            amplitudes.append(round(math.sqrt(float(imaginary[k])** 2 + float(real[k])** 2),4))
            phases.append(round(math.atan2(float(imaginary[k]), float(real[k])),4))
        total_amplitudes.append(np.array(amplitudes))
        total_phases.append(np.array(phases))
    
    total_amplitudes_df = pd.DataFrame(total_amplitudes)
    total_phases_df = pd.DataFrame(total_phases)

        
    return total_amplitudes_df, total_phases_df

#### Amplitude and Phase of Walking

In [6]:
## Extract walk amplitude and phase
walk_amplitudes_df, walk_phases_df = convert_csi_to_amplitude_phase(walk_df)

## Here, based on sig_mode, 802.11a/g/n received. Here we receive both 802.11a/g and 802.11n
## So, either 52 or 56 total sub-carrier would be useful. The first 4 and the last 4 are rejected as null guard.


## Amplitude
walk_df1_amps = walk_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
walk_df2_amps = walk_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

walk_df_amps_final = pd.concat([walk_df1_amps, walk_df2_amps],axis=1)


## Phase
walk_df1_phase = walk_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
walk_df2_phase = walk_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

walk_df_phase_final = pd.concat([walk_df1_phase, walk_df2_phase],axis=1)

### Amplitude and Phase of Jogging

In [7]:
## Extract jog amplitude and phase
jog_amplitudes_df, jog_phases_df = convert_csi_to_amplitude_phase(jog_df)

## Amplitude
jog_df1_amps = jog_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
jog_df2_amps = jog_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

jog_df_amps_final = pd.concat([jog_df1_amps, jog_df2_amps],axis=1)


## Phase
jog_df1_phase = jog_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
jog_df2_phase = jog_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

jog_df_phase_final = pd.concat([jog_df1_phase, jog_df2_phase],axis=1)

### Amplitude and Phase of no activity

In [8]:
## Extract noact amplitude and phase
noact_amplitudes_df, noact_phases_df = convert_csi_to_amplitude_phase(noact_df)

## Amplitude
noact_df1_amps = noact_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
noact_df2_amps = noact_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

noact_df_amps_final = pd.concat([noact_df1_amps, noact_df2_amps],axis=1)


## Phase
noact_df1_phase = noact_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
noact_df2_phase = noact_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

noact_df_phase_final = pd.concat([noact_df1_phase, noact_df2_phase],axis=1)

#### Moving Average of the data

In [9]:
# Though moving average code is mentioned, it is not implemented in the final training pipeline
# Moving average of the data
def moving_average(df, window_size):
    """"
    Compute the moving average with a window of size specified
    """

    rolling_mean = df.rolling(window=window_size).mean()
    downsampled = rolling_mean.iloc[window_size::window_size, :]
    return downsampled


## Set moving average window of desired size 
window_size = 1                 # window size of 1 doesn't perform the actual moving average
mov_avg_walk_amps_df = moving_average(walk_df_amps_final,window_size)
mov_avg_walk_phase_df = moving_average(walk_df_phase_final,window_size)
mov_avg_jog_amps_df = moving_average(jog_df_amps_final,window_size)
mov_avg_jog_phase_df = moving_average(jog_df_phase_final,window_size)
mov_avg_noact_amps_df = moving_average(noact_df_amps_final,window_size)
mov_avg_noact_phase_df = moving_average(noact_df_phase_final,window_size)

### Select n samples of data for input to the system as a flattened matrix

In [10]:
def select_data_portion(dataFrm,select_size):
    selected_df_list = []
    for item in range(0,len(dataFrm)-select_size, select_size):
        selected_df = dataFrm.iloc[item:item+select_size].to_numpy().flatten()
        selected_df_list.append(selected_df)
    selected_df = pd.DataFrame(selected_df_list)
    return selected_df

In [11]:
X_walk = select_data_portion(mov_avg_walk_amps_df, 100)
X_jog = select_data_portion(mov_avg_jog_amps_df, 100)
X_noact = select_data_portion(mov_avg_noact_amps_df, 100)

# X data
X_training = pd.concat([X_walk,X_jog,X_noact],axis=0,ignore_index=True)
# X_training

In [12]:
y_walk = np.zeros(len(X_walk))
y_jog = np.ones(len(X_jog))
y_noact = np.ones(len(X_noact))+1

# Target Labels
y_training = np.concatenate([y_walk, y_jog, y_noact],axis=0)
print(len(y_walk), len(y_jog), len(y_noact))

290 351 447


## PCA and T-SNE Visualization

In [13]:
import plotly.express as px

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_training)
fig = px.scatter(X_pca[:, 0], X_pca[:, 1], color=y_training)
fig.update_layout(
    title="PCA visualization of Walk, Jog and No Activity dataset",
    xaxis_title="First Principal Component",
    yaxis_title="Second Principal Component",
)
fig.show()

In [14]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(X_training)

fig = px.scatter(x=X_tsne[:, 0], y=X_tsne[:, 1], color=y_training)
fig.update_layout(
    title="t-SNE visualization of Walk, Jog and No Activity dataset",
    xaxis_title="First t-SNE",
    yaxis_title="Second t-SNE",
)
fig.show()


------