# HAR with CSI data: Model creation
### Necessary Imports

In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import math, os, pickle
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
import random    
random.seed(42)
np.random.seed(42)

### Create dataframes for each activity

In [21]:
def dataframe_of_CSI(directory):
    # Create empty DataFrames for walk, run, and jog
    df_walk = pd.DataFrame()
    df_noact = pd.DataFrame()
    df_jog = pd.DataFrame()

    # Loop through each file in the directory
    for file in os.listdir(directory):
        # Check if the file is a CSV file and contains "walk", "noact", or "jog" in the name
        if file.endswith(".csv") and ("walk" in file or "noact" in file or "jog" in file):
            # Read the CSV file and extract the CSI_DATA column
            file_path = os.path.join(directory, file)
            df = pd.read_csv(file_path)
            print(df.columns)
            print(df.sig_mode.value_counts())
            csi_rows_raw = []

            ## Filtering can be done using
            df = df[(df["bandwidth"]==0)]# & (df["secondary_channel"]==1)]

            ## Ignore first few and last few seconds data
            for one_row in df['CSI_DATA'].iloc[40:-40]:
                one_row = one_row.strip("[]")
                csi_row_raw = [int(x) for x in one_row.split(" ") if x != '']
                csi_rows_raw.append(csi_row_raw)
        
            # Convert the list of lists to a DataFrame and append it to the appropriate DataFrame based on the file name
            csi_df = pd.DataFrame(csi_rows_raw)

            # Check which dataframe we are working on and concat the data
            if "walk" in file:
                df_walk = pd.concat([df_walk, csi_df], axis=0)
            elif "noact" in file:
                df_noact = pd.concat([df_noact, csi_df], axis=0)
            else:
                df_jog = pd.concat([df_jog, csi_df], axis=0)
    return df_walk, df_noact, df_jog

In [22]:
# Define the directory path where the CSV files are located
directory = "C:\\Users\\Dell\\Documents\\Wifi-Sensing-HAR\\data\\our_data"

walk_df, noact_df, jog_df = dataframe_of_CSI(directory)

Index(['type', 'role', 'mac', 'rssi', 'rate', 'sig_mode', 'mcs', 'bandwidth',
       'smoothing', 'not_sounding', 'aggregation', 'stbc', 'fec_coding', 'sgi',
       'noise_floor', 'ampdu_cnt', 'channel', 'secondary_channel',
       'local_timestamp', 'ant', 'sig_len', 'rx_state', 'real_time_set',
       'real_timestamp', 'len', 'CSI_DATA'],
      dtype='object')
sig_mode
1    15088
0     6117
Name: count, dtype: int64
Index(['type', 'role', 'mac', 'rssi', 'rate', 'sig_mode', 'mcs', 'bandwidth',
       'smoothing', 'not_sounding', 'aggregation', 'stbc', 'fec_coding', 'sgi',
       'noise_floor', 'ampdu_cnt', 'channel', 'secondary_channel',
       'local_timestamp', 'ant', 'sig_len', 'rx_state', 'real_time_set',
       'real_timestamp', 'len', 'CSI_DATA'],
      dtype='object')
sig_mode
1    29234
0    10547
Name: count, dtype: int64
Index(['type', 'role', 'mac', 'rssi', 'rate', 'sig_mode', 'mcs', 'bandwidth',
       'smoothing', 'not_sounding', 'aggregation', 'stbc', 'fec_coding', 'sgi'

In [23]:
print(len(walk_df),len(noact_df),len(jog_df))

29093 44704 35145


### Extact Amplitude and Phase from each dataframe

In [24]:
## Extract Amplitude and Phase from the dataframe
def convert_csi_to_amplitude_phase(df):
    total_amplitudes = []
    total_phases = []

    for i, value in enumerate(df.values):
        imaginary = []
        real = []
        amplitudes = [] 
        phases = []

        csi_one_row_lst = value.tolist()

         # Create list of imaginary and real numbers from CSI
        [imaginary.append(csi_one_row_lst[item]) if item%2==0 else real.append(csi_one_row_lst[item]) for item in range(len(csi_one_row_lst))]

        # Transform imaginary and real into amplitude and phase
        val = int(len(csi_one_row_lst)//2)
        for k in range(val):
            amplitudes.append(round(math.sqrt(float(imaginary[k])** 2 + float(real[k])** 2),4))
            phases.append(round(math.atan2(float(imaginary[k]), float(real[k])),4))
        total_amplitudes.append(np.array(amplitudes))
        total_phases.append(np.array(phases))
    
    total_amplitudes_df = pd.DataFrame(total_amplitudes)
    total_phases_df = pd.DataFrame(total_phases)

        
    return total_amplitudes_df, total_phases_df

### Amplitude and Phase of Walking

In [25]:
## Extract walk amplitude and phase
walk_amplitudes_df, walk_phases_df = convert_csi_to_amplitude_phase(walk_df)

## Here, based on sig_mode, 802.11a/g/n received. Here we receive both 802.11a/g and 802.11n
## So, either 52 or 56 total sub-carrier would be useful. The first 4 and the last 4 are rejected as null guard.


## Amplitude
walk_df1_amps = walk_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
walk_df2_amps = walk_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

walk_df_amps_final = pd.concat([walk_df1_amps, walk_df2_amps],axis=1)


## Phase
walk_df1_phase = walk_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
walk_df2_phase = walk_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

walk_df_phase_final = pd.concat([walk_df1_phase, walk_df2_phase],axis=1)

### Amplitude and Phase of Jogging

In [26]:
## Extract jog amplitude and phase
jog_amplitudes_df, jog_phases_df = convert_csi_to_amplitude_phase(jog_df)

## Amplitude
jog_df1_amps = jog_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
jog_df2_amps = jog_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

jog_df_amps_final = pd.concat([jog_df1_amps, jog_df2_amps],axis=1)


## Phase
jog_df1_phase = jog_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
jog_df2_phase = jog_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

jog_df_phase_final = pd.concat([jog_df1_phase, jog_df2_phase],axis=1)

### Amplitude and Phase of No Activity

In [27]:
## Extract jog amplitude and phase
noact_amplitudes_df, noact_phases_df = convert_csi_to_amplitude_phase(noact_df)

## Amplitude
noact_df1_amps = noact_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
noact_df2_amps = noact_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

noact_df_amps_final = pd.concat([noact_df1_amps, noact_df2_amps],axis=1)


## Phase
noact_df1_phase = noact_amplitudes_df.iloc[:,5:32]  # 6:32 for 802.11ag 4:32 for 802.11n
noact_df2_phase = noact_amplitudes_df.iloc[:,33:60] # 33:59 for 802.11ag 33:61 for 802.11n

noact_df_phase_final = pd.concat([noact_df1_phase, noact_df2_phase],axis=1)

#### Moving Average of the data

In [28]:
# Moving average of the data
def moving_average(df, window_size):
    """"
    Compute the moving average with a window of size specified
    """

    rolling_mean = df.rolling(window=window_size).mean()
    downsampled = rolling_mean.iloc[window_size::window_size, :]
    return downsampled


## Set moving average window of desired size 
window_size = 1
mov_avg_walk_amps_df = moving_average(walk_df_amps_final,window_size)
mov_avg_jog_amps_df = moving_average(jog_df_amps_final,window_size)
mov_avg_noact_amps_df = moving_average(noact_df_amps_final,window_size)


### Select data matrix and flatten it for training

In [29]:
def select_data_portion(dataFrm,select_size):
    selected_df_list = []
    for item in range(0,len(dataFrm)-select_size, select_size):
        selected_df = dataFrm.iloc[item:item+select_size].to_numpy().flatten()
        selected_df_list.append(selected_df)
    selected_df = pd.DataFrame(selected_df_list)
    return selected_df

In [30]:
X_walk = select_data_portion(mov_avg_walk_amps_df, 100)
X_jog = select_data_portion(mov_avg_jog_amps_df, 100)
X_training = pd.concat([X_walk,X_jog],axis=0,ignore_index=True)
# X_training

In [31]:
y_walk = np.zeros(len(X_walk))
y_jog = np.ones(len(X_jog))
y_training = np.concatenate([y_walk, y_jog],axis=0)

In [32]:
X_training

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5390,5391,5392,5393,5394,5395,5396,5397,5398,5399
0,0.0,21.9317,17.2627,25.4951,20.8806,17.7200,18.9737,18.1108,21.0950,25.1794,...,19.0000,17.2627,19.9249,18.2483,20.3961,21.8403,19.4165,17.1172,21.8403,0.0
1,0.0,18.0278,17.0294,20.2237,19.9249,17.1172,20.2237,19.2354,16.0312,23.0868,...,11.3137,12.3693,12.0830,12.0830,15.8114,15.8114,16.5529,18.2483,16.2788,0.0
2,0.0,14.8661,12.0830,21.9545,18.0278,15.2643,12.0830,16.2788,14.5602,21.0238,...,9.2195,11.4018,13.3417,12.0416,16.0312,10.4403,15.2315,14.3178,12.8062,0.0
3,0.0,16.5529,10.7703,18.0278,17.0000,11.7047,14.3178,7.0711,11.1803,14.1421,...,13.6015,17.8885,11.4018,16.4012,16.9706,7.0711,17.6918,13.4164,14.1421,0.0
4,0.0,7.0000,9.4868,10.8167,8.9443,14.4222,17.2627,10.2956,9.2195,15.5242,...,17.0880,16.7631,19.4165,20.6155,14.3178,14.3178,14.3178,14.8661,13.9284,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
636,0.0,10.7703,4.4721,12.1655,11.1803,14.2127,10.0000,9.4868,10.7703,10.2956,...,16.1245,19.4165,15.2315,10.6301,15.0000,13.6015,16.6433,14.3178,11.1803,0.0
637,0.0,13.6015,13.8924,21.2603,15.0000,8.6023,15.6205,12.0830,16.2788,14.2127,...,13.0384,10.0499,9.4340,14.1421,10.0499,9.4868,14.0357,11.1803,9.4868,0.0
638,0.0,9.2195,5.3852,14.2127,13.6015,8.4853,7.0711,8.6023,8.5440,8.5440,...,13.8924,10.6301,12.3693,13.6015,14.7648,12.2066,9.2195,12.2066,6.7082,0.0
639,0.0,9.4340,17.0880,10.1980,11.7047,11.0454,12.6491,8.2462,8.5440,9.0000,...,5.0990,9.4868,12.0416,10.0499,12.3693,12.0416,6.0828,6.4031,5.0990,0.0


## Training and Testing pipeline using sklearn

In [86]:
from sklearn.pipeline import Pipeline

X_train, X_test, y_train, y_test = train_test_split(X_training, y_training, stratify=y_training, test_size=0.2, shuffle=True, random_state=1)
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5390,5391,5392,5393,5394,5395,5396,5397,5398,5399
470,0.0,16.4012,9.4868,13.0000,21.3776,15.6205,13.4536,13.9284,11.1803,11.0454,...,11.4018,10.7703,10.0000,5.6569,10.0000,11.4018,6.0828,10.4403,14.1421,0.0
475,0.0,6.4031,13.6015,13.0384,8.5440,11.6619,13.6015,9.0554,15.2643,8.0623,...,13.8924,11.6619,12.5300,10.2956,13.6015,12.2066,12.8062,8.6023,10.0000,0.0
497,0.0,18.3848,9.8489,9.2195,11.1803,13.8924,8.0623,10.1980,9.4340,10.6301,...,10.7703,15.2643,12.0416,13.0384,12.7279,8.9443,6.4031,9.4340,7.8102,0.0
142,0.0,7.6158,15.6525,15.0000,12.2066,15.8114,14.3178,11.1803,13.4536,14.0000,...,22.0227,21.8403,19.2354,21.2132,20.2485,13.6015,17.7200,20.6155,16.2788,0.0
452,0.0,13.0000,7.6158,15.6525,14.7648,13.9284,13.4164,15.2315,13.6015,10.7703,...,13.1529,12.1655,13.6015,14.1421,13.3417,10.1980,13.4536,13.0384,18.0278,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337,0.0,5.0990,6.7082,6.0828,13.0000,7.0711,6.4031,5.3852,1.4142,6.0828,...,7.2801,11.0454,15.1327,12.0416,5.3852,12.2066,8.5440,8.6023,9.2195,0.0
118,0.0,23.7065,23.4307,19.4165,26.9258,19.7231,22.4722,19.3132,23.4094,25.2982,...,13.6015,14.1421,14.4222,14.8661,15.8114,11.4018,10.2956,9.8489,12.6491,0.0
324,0.0,21.2132,19.3132,21.9317,17.2047,20.3961,15.5242,20.8806,22.0227,17.0000,...,17.8885,17.8885,20.2485,17.1172,18.0278,14.7648,19.6469,22.3607,21.1896,0.0
165,0.0,23.0217,19.7990,24.0416,23.6008,23.7697,21.9317,23.3238,22.4722,20.5913,...,22.4722,18.3848,23.8537,25.6125,15.6205,23.3452,22.8473,20.6155,19.2354,0.0


### SVM Classifier


In [87]:
pipe = Pipeline([('scaler', StandardScaler()),('pca', PCA(n_components=5)),('svc', SVC())])
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)

0.8992248062015504

### KNN Classifier

In [88]:
pipe2 = Pipeline([('scaler', StandardScaler()),('pca', PCA(n_components=5)),('knn', KNeighborsClassifier())])
pipe2.fit(X_train, y_train)
pipe2.score(X_test, y_test)

0.9069767441860465

### Save Model

In [42]:
# model_params = "C:\\Users\\Dell\\Documents\\Wifi-Sensing-HAR\\data\\model_params"
# pickle.dump(pipe1, open(f"{model_params}\\pipe_final_svm.pkl","wb"))
# pickle.dump(pipe2, open(f"{model_params}\\pipe_final_knn.pkl","wb"))

# loaded_model = pickle.load(open(f"{model_params}\\model.pkl","rb"))

-------
Training and Testing Pipeline ends here