# Pipeline for CSI signals

## Load dataset with every subcarrier, add receiver_pc and calculate amplitude and phase

In [18]:
import pandas as pd
import numpy as np
import re

# Define the file paths
file_paths = ['new dataset with obstacles/pc_1_with_train_30_min.csv', 'new dataset with obstacles/pc_2_with_train_30_min.csv', 
              'new dataset with obstacles/pc_3_with_train_30_min.csv', 'new dataset with obstacles/pc_4_with_train_30_min.csv']

dataframes = []
for file_path in file_paths:
    df = pd.read_csv(file_path)
    receiver_pc = file_path.split('_')[1]  
    df['receiver_pc'] = receiver_pc       
    dataframes.append(df)


data = pd.concat(dataframes, ignore_index=True)


pattern = r'subcarrier_(\d+)_rx(\d+)_tx(\d+)_(real|imag)'


amplitude_phase_cols = {}


for col in data.columns:
    match = re.match(pattern, col)
    if match:
        subcarrier_num = int(match.group(1))
        rx = int(match.group(2))
        tx = int(match.group(3))
        component = match.group(4)
        
        key = f'subcarrier_{subcarrier_num}_rx{rx}_tx{tx}'
        
        if key not in amplitude_phase_cols:
            amplitude_phase_cols[key] = {'real': None, 'imag': None}
        
        amplitude_phase_cols[key][component] = col


amplitude_phase_data = {}


for key, components in amplitude_phase_cols.items():
    real_col = components['real']
    imag_col = components['imag']
    
    if real_col and imag_col:
        amplitude = np.sqrt(data[real_col]**2 + data[imag_col]**2)
        phase = np.arctan2(data[imag_col], data[real_col])
        
        amplitude_phase_data[f'{key}_amplitude'] = amplitude
        amplitude_phase_data[f'{key}_phase'] = phase


amplitude_phase_df = pd.DataFrame(amplitude_phase_data)
data = pd.concat([data, amplitude_phase_df], axis=1)
real_imag_cols = [components[component] for key, components in amplitude_phase_cols.items() for component in ['real', 'imag']]
data.drop(columns=real_imag_cols, inplace=True)


column_order = [col for col in data.columns if col != 'receiver_pc'] + ['receiver_pc']
data = data[column_order]

# Add column for sender and remove mac address

In [19]:
# Extract the sender ID from the last two characters of `mac_address`
data['sender'] = data['source_mac'].str[-1:]
data.drop(columns=['source_mac'], inplace=True)



# Legge til system_time som kolonne og synkronisere basert på kolonne, regne ut total lengde av hvert datasett (må huske å synkronisere og fjerne slik at de starter lik ish alle pc-er)

In [20]:
import numpy as np
from datetime import datetime, timedelta


FTM_TICK_DURATION = 3.125e-9 
MAX_FTM_TICKS = 4294967295    


def calculate_system_time(ftm_clock_values, start_time_str):

    start_time = np.datetime64(start_time_str, 'ns')
    
    system_times = np.zeros(len(ftm_clock_values), dtype='datetime64[ns]')
    
    current_offset = 0

    for i in range(len(ftm_clock_values)):
        if i == 0:
            system_times[i] = start_time
        else:
            if ftm_clock_values[i] < ftm_clock_values[i - 1]:
                current_offset += MAX_FTM_TICKS + 1

            elapsed_time_ns = (ftm_clock_values[i] + current_offset - ftm_clock_values[0]) * FTM_TICK_DURATION * 1e9
            system_times[i] = start_time + np.timedelta64(int(elapsed_time_ns), 'ns')  # Nanosecond-level precision
    
    return system_times


pc1_start_time = "2024-12-06 16:57:00.641571495"
pc2_start_time = "2024-12-06 16:58:00.439184619" 
pc3_start_time = "2024-12-06 16:59:00.469645341"  
pc4_start_time = "2024-12-06 17:00:00.538706886"  


data['system_time'] = None  


def assign_system_time_for_pc(receiver_pc, start_time):
    mask = data['receiver_pc'] == receiver_pc
    ftm_clock_values = data.loc[mask, 'ftm_clock'].values
    system_times = calculate_system_time(ftm_clock_values, start_time)
    data.loc[mask, 'system_time'] = system_times
    print(f"\nCalculated system times for PC {receiver_pc}:")
    print(data.loc[mask, ['ftm_clock', 'system_time']].head(10))


assign_system_time_for_pc('1', pc1_start_time)
assign_system_time_for_pc('2', pc2_start_time)
assign_system_time_for_pc('3', pc3_start_time)
assign_system_time_for_pc('4', pc4_start_time)


data = data.sort_values(by='system_time').reset_index(drop=True)



Calculated system times for PC 1:
    ftm_clock                    system_time
0  2535734592  2024-12-06T16:57:00.641571495
1  2538175328  2024-12-06T16:57:00.649198795
2  2547207312  2024-12-06T16:57:00.677423744
3  2551678752  2024-12-06T16:57:00.691396994
4  2554066016  2024-12-06T16:57:00.698857194
5  2562338976  2024-12-06T16:57:00.724710195
6  2567741056  2024-12-06T16:57:00.741591694
7  2569988304  2024-12-06T16:57:00.748614345
8  2577494976  2024-12-06T16:57:00.772072694
9  2583718992  2024-12-06T16:57:00.791522744

Calculated system times for PC 2:
         ftm_clock                    system_time
138969  3901971840  2024-12-06T16:58:00.439184619
138970  3902639136  2024-12-06T16:58:00.441269919
138971  3903258448  2024-12-06T16:58:00.443205268
138972  3917787568  2024-12-06T16:58:00.488608769
138973  3917997968  2024-12-06T16:58:00.489266269
138974  3919314640  2024-12-06T16:58:00.493380868
138975  3932938480  2024-12-06T16:58:00.535955368
138976  3933987456  2024-12-06T16:5

# Check last measurement of each PC

In [21]:

pc2_data = data[data['receiver_pc'] == '2']


if not pc2_data.empty:
    pc2_last_measurement = pc2_data.iloc[-1]
    
    print("Den siste målingen for receiver_pc = 2:")
    print(pc2_last_measurement)
else:
    print("Ingen målinger funnet for receiver_pc = 2 i datasettet.")
    
receiver_4_data = data[data['receiver_pc'] == '4'].copy()


print(receiver_4_data.head(20))



Den siste målingen for receiver_pc = 2:
csi_size                                                     896
ftm_clock                                             3056896176
num_rx                                                         2
num_tx                                                         2
num_subcarriers                                               56
                                               ...              
subcarrier_55_rx1_tx1_amplitude                        55.036352
subcarrier_55_rx1_tx1_phase                            -0.036348
receiver_pc                                                    2
sender                                                         3
system_time                        2024-12-06T17:36:26.343244769
Name: 539131, Length: 466, dtype: object
       csi_size   ftm_clock  num_rx  num_tx  num_subcarriers  rssi1  rssi2  \
21591       896  2583161268       2       2               56     53     46   
21597       896  2588373796       2       2     

# Remove everything before and after all pc's log at the same time (three senders repeats per pc)

In [22]:
import numpy as np


data = data.sort_values(by='system_time').reset_index(drop=True)


valid_start_times = data.groupby('receiver_pc')['system_time'].first()


latest_valid_start = valid_start_times.max()


data = data[data['system_time'] >= latest_valid_start]


valid_end_times = data.groupby('receiver_pc')['system_time'].last()


earliest_valid_end = valid_end_times.min()


data = data[data['system_time'] <= earliest_valid_end]


durations = data.groupby('receiver_pc')['system_time'].agg([np.min, np.max])
durations['duration'] = durations['amax'] - durations['amin']


print("Varighet for hver receiver_pc basert på gyldige målinger:")
print(durations[['duration']])
print("\nEndelige data etter fjerning av målinger post første PC-stopp:")
print(data.head(20))


Varighet for hver receiver_pc basert på gyldige målinger:
                             duration
receiver_pc                          
1           0 days 00:35:37.456483950
2           0 days 00:35:37.450518850
3           0 days 00:35:37.421797851
4           0 days 00:35:37.459544599

Endelige data etter fjerning av målinger post første PC-stopp:
       csi_size   ftm_clock  num_rx  num_tx  num_subcarriers  rssi1  rssi2  \
21591       896  2583161268       2       2               56     53     46   
21592       896  3679641456       2       2               56     49     51   
21593       896  3680119680       2       2               56     49     59   
21594       896  4269473760       2       2               56     56     49   
21595       896  3682000736       2       2               56     38     48   
21596       896  4271833040       2       2               56     51     59   
21597       896  2588373796       2       2               56     53     49   
21598       896  259025485

In [23]:
start_time = pd.to_datetime("2024-12-06 17:04:00")
end_time = pd.to_datetime("2024-12-06 17:19:00") # 15 minutes


data['system_time'] = pd.to_datetime(data['system_time'])

data = data[(data['system_time'] >= start_time) & (data['system_time'] <= end_time)]

print("Data filtered to 15 minutes:")
print(data[['receiver_pc', 'system_time']].head())
print(data[['receiver_pc', 'system_time']].tail())


Data filtered to 15 minutes:
      receiver_pc                   system_time
79049           2 2024-12-06 17:04:00.000614569
79050           1 2024-12-06 17:04:00.003457044
79051           4 2024-12-06 17:04:00.011225785
79052           4 2024-12-06 17:04:00.014619386
79053           3 2024-12-06 17:04:00.027300091
       receiver_pc                   system_time
294972           4 2024-12-06 17:18:59.968705035
294973           2 2024-12-06 17:18:59.972645868
294974           1 2024-12-06 17:18:59.977985645
294975           4 2024-12-06 17:18:59.984556086
294976           4 2024-12-06 17:18:59.989860885


# Legge til annotert data og fjerne signaler og tid som eksisterer før datasettet har offisielt har begynt

In [24]:
annotations = pd.read_excel('new dataset with obstacles/dataset_annotated_with_obstalces.xlsx')


annotations['time_annotated'] = pd.to_datetime("2024-12-06 " + annotations['time_annotated'].astype(str))


annotations['End Time'] = annotations['time_annotated'].shift(-1) 
annotations = annotations.dropna()  

data = pd.merge_asof(data.sort_values('system_time'), 
                     annotations[['time_annotated', 'End Time', 'movement']],
                     left_on='system_time', right_on='time_annotated', 
                     direction='backward')


data = data[(data['system_time'] >= data['time_annotated']) & (data['system_time'] < data['End Time'])]
data = data.drop(columns=['End Time', 'time_annotated'])


        csi_size   ftm_clock  num_rx  num_tx  num_subcarriers  rssi1  rssi2  \
215908       896  3796849472       2       2               56     54     48   
215909       896    14869196       2       2               56     59     54   
215910       896  3208052288       2       2               56     48     58   
215911       896  2112469796       2       2               56     51     45   
215912       896  3211254048       2       2               56     41     48   
215913       896  3802092512       2       2               56     53     59   
215914       896  2117608996       2       2               56     50     49   
215915       896  2120810772       2       2               56     47     51   
215916       896    25800700       2       2               56     39     57   
215917       896  3809822208       2       2               56     56     48   
215918       896    28076204       2       2               56     47     55   
215919       896  3221259232       2       2        

In [25]:
import os

output_directory = "complete dataset csv with obstacles"
output_filename = "complete_15_min_with_obstacles.csv"


os.makedirs(output_directory, exist_ok=True)


output_filepath = os.path.join(output_directory, output_filename)

try:
    data.to_csv(output_filepath, index=False)
    print(f"DataFrame lagret som CSV-fil i: {output_filepath}")
except Exception as e:
    print(f"En feil oppstod ved lagring av CSV-filen: {e}")


DataFrame lagret som CSV-fil i: complete dataset csv with obstacles\complete_15_min_with_obstacles.csv


In [26]:
data.head(20)

Unnamed: 0,csi_size,ftm_clock,num_rx,num_tx,num_subcarriers,rssi1,rssi2,rate_format,channel_width,mcs,...,subcarrier_55_rx0_tx1_amplitude,subcarrier_55_rx0_tx1_phase,subcarrier_55_rx1_tx0_amplitude,subcarrier_55_rx1_tx0_phase,subcarrier_55_rx1_tx1_amplitude,subcarrier_55_rx1_tx1_phase,receiver_pc,sender,system_time,movement
0,896,2997512432,2,2,56,61,58,VHT,20,0,...,58.821765,-2.187093,59.5483,0.714091,57.384667,-0.847048,2,3,2024-12-06 17:04:00.000614569,A
1,896,3587551792,2,2,56,52,59,VHT,20,0,...,76.661594,0.533708,117.711512,1.400061,52.239832,1.666655,1,3,2024-12-06 17:04:00.003457044,A
2,896,1904955988,2,2,56,54,47,VHT,20,0,...,59.211485,-1.65534,39.408121,0.623199,80.950602,0.474697,4,1,2024-12-06 17:04:00.011225785,A
3,896,1906041940,2,2,56,49,50,VHT,20,0,...,32.202484,0.44976,109.480592,-2.85451,74.242845,1.489892,4,3,2024-12-06 17:04:00.014619386,A
4,896,4108162012,2,2,56,58,61,VHT,20,0,...,116.284135,1.018578,38.639358,2.959424,87.022985,3.118608,3,2,2024-12-06 17:04:00.027300091,A
5,896,4109766876,2,2,56,48,55,VHT,20,0,...,57.697487,1.122651,66.887966,-2.590917,112.378824,-1.364675,3,4,2024-12-06 17:04:00.032315291,A
6,896,3597294864,2,2,56,60,49,VHT,20,0,...,54.341513,-0.222661,163.229899,3.018757,62.625873,0.898549,1,2,2024-12-06 17:04:00.033904144,A
7,896,3008860352,2,2,56,48,49,VHT,20,0,...,135.771867,2.345778,62.289646,1.66727,112.294256,-0.151972,2,4,2024-12-06 17:04:00.036076818,A
8,896,3598899728,2,2,56,51,48,VHT,20,0,...,76.059187,3.102139,23.706539,2.05331,116.674762,-2.362255,1,4,2024-12-06 17:04:00.038919344,A
9,896,4113343468,2,2,56,56,53,VHT,20,0,...,38.639358,0.182168,75.432089,-0.254625,57.245087,1.232473,3,1,2024-12-06 17:04:00.043492141,A


In [27]:
import pandas as pd
import numpy as np

file_path = "complete dataset csv with obstacles/complete_15_min_with_obstacles.csv"

data = pd.read_csv(file_path)

In [28]:
data.head()
for column in data.columns:
    print(column)

csi_size
ftm_clock
num_rx
num_tx
num_subcarriers
rssi1
rssi2
rate_format
channel_width
mcs
antenna_a
antenna_b
ldpc
ss
beamforming
subcarrier_0_rx0_tx0_amplitude
subcarrier_0_rx0_tx0_phase
subcarrier_0_rx0_tx1_amplitude
subcarrier_0_rx0_tx1_phase
subcarrier_0_rx1_tx0_amplitude
subcarrier_0_rx1_tx0_phase
subcarrier_0_rx1_tx1_amplitude
subcarrier_0_rx1_tx1_phase
subcarrier_1_rx0_tx0_amplitude
subcarrier_1_rx0_tx0_phase
subcarrier_1_rx0_tx1_amplitude
subcarrier_1_rx0_tx1_phase
subcarrier_1_rx1_tx0_amplitude
subcarrier_1_rx1_tx0_phase
subcarrier_1_rx1_tx1_amplitude
subcarrier_1_rx1_tx1_phase
subcarrier_2_rx0_tx0_amplitude
subcarrier_2_rx0_tx0_phase
subcarrier_2_rx0_tx1_amplitude
subcarrier_2_rx0_tx1_phase
subcarrier_2_rx1_tx0_amplitude
subcarrier_2_rx1_tx0_phase
subcarrier_2_rx1_tx1_amplitude
subcarrier_2_rx1_tx1_phase
subcarrier_3_rx0_tx0_amplitude
subcarrier_3_rx0_tx0_phase
subcarrier_3_rx0_tx1_amplitude
subcarrier_3_rx0_tx1_phase
subcarrier_3_rx1_tx0_amplitude
subcarrier_3_rx1_tx0_phase

In [29]:
data['system_time'] = pd.to_datetime(data['system_time'])


data.sort_values(by='system_time', inplace=True)

start_time = data['system_time'].min()
data['time_since_start'] = (data['system_time'] - start_time).dt.total_seconds()

data['time_diff'] = data['time_since_start'].diff().fillna(0)

data.drop(columns=['system_time'], inplace=True)

print(data.head())

   csi_size   ftm_clock  num_rx  num_tx  num_subcarriers  rssi1  rssi2  \
0       896  2997512432       2       2               56     61     58   
1       896  3587551792       2       2               56     52     59   
2       896  1904955988       2       2               56     54     47   
3       896  1906041940       2       2               56     49     50   
4       896  4108162012       2       2               56     58     61   

  rate_format  channel_width  mcs  ...  subcarrier_55_rx0_tx1_phase  \
0         VHT             20    0  ...                    -2.187093   
1         VHT             20    0  ...                     0.533708   
2         VHT             20    0  ...                    -1.655340   
3         VHT             20    0  ...                     0.449760   
4         VHT             20    0  ...                     1.018578   

   subcarrier_55_rx1_tx0_amplitude  subcarrier_55_rx1_tx0_phase  \
0                        59.548300                     0.7140

In [30]:
all_columns = data.columns.tolist()

amplitude_cols = [c for c in all_columns if "_amplitude" in c and "subcarrier_" in c]
phase_cols     = [c for c in all_columns if "_phase"     in c and "subcarrier_" in c]

# -------------------------------------------------------------------
amplitude_threshold_frac = 0.001  
removed_columns_info = {}  

# -------------------------------------------------------------------

grouped = data.groupby("receiver_pc")

for rx_pc, df_group in grouped:

    subcarrier_max_values = {}
    for col in amplitude_cols:
        if col in df_group.columns:
            col_max = df_group[col].max(skipna=True)
            subcarrier_max_values[col] = col_max
        else:
            subcarrier_max_values[col] = 0.0  
    
    global_max_ampl = max(subcarrier_max_values.values()) if subcarrier_max_values else 0
    
    if global_max_ampl <= 0:
        continue
    
    threshold_val = amplitude_threshold_frac * global_max_ampl
    
    amplitude_cols_to_remove = []
    for col, cmax in subcarrier_max_values.items():
        if cmax < threshold_val:
            amplitude_cols_to_remove.append(col)
    

    phase_cols_to_remove = []
    for amp_col in amplitude_cols_to_remove:
        phase_col = amp_col.replace("_amplitude", "_phase")
        if phase_col in data.columns:
            phase_cols_to_remove.append(phase_col)
    

    cols_to_remove = amplitude_cols_to_remove + phase_cols_to_remove
    

    cols_to_remove_final = [c for c in cols_to_remove if c in data.columns]
    
    if cols_to_remove_final:
        if rx_pc not in removed_columns_info:
            removed_columns_info[rx_pc] = []
        removed_columns_info[rx_pc].extend(cols_to_remove_final)
        
        data.drop(columns=cols_to_remove_final, inplace=True)

# -------------------------------------------------------------------
print("FJERNE-RAPPORT (amplitude og fase), terskel = {}% av global max i hver PC-gruppe\n".format(
    amplitude_threshold_frac * 100
))

if not removed_columns_info:
    print("Ingen kolonner ble fjernet.")
else:
    for rx_pc, removed_cols in removed_columns_info.items():
        print(f"\nPC {rx_pc}: fjernet {len(removed_cols)} kolonner")
        print("  ->", removed_cols)


# -------------------------------------------------------------------

FJERNE-RAPPORT (amplitude og fase), terskel = 0.1% av global max i hver PC-gruppe


PC 1: fjernet 32 kolonner
  -> ['subcarrier_7_rx0_tx0_amplitude', 'subcarrier_7_rx0_tx1_amplitude', 'subcarrier_7_rx1_tx0_amplitude', 'subcarrier_7_rx1_tx1_amplitude', 'subcarrier_21_rx0_tx0_amplitude', 'subcarrier_21_rx0_tx1_amplitude', 'subcarrier_21_rx1_tx0_amplitude', 'subcarrier_21_rx1_tx1_amplitude', 'subcarrier_34_rx0_tx0_amplitude', 'subcarrier_34_rx0_tx1_amplitude', 'subcarrier_34_rx1_tx0_amplitude', 'subcarrier_34_rx1_tx1_amplitude', 'subcarrier_48_rx0_tx0_amplitude', 'subcarrier_48_rx0_tx1_amplitude', 'subcarrier_48_rx1_tx0_amplitude', 'subcarrier_48_rx1_tx1_amplitude', 'subcarrier_7_rx0_tx0_phase', 'subcarrier_7_rx0_tx1_phase', 'subcarrier_7_rx1_tx0_phase', 'subcarrier_7_rx1_tx1_phase', 'subcarrier_21_rx0_tx0_phase', 'subcarrier_21_rx0_tx1_phase', 'subcarrier_21_rx1_tx0_phase', 'subcarrier_21_rx1_tx1_phase', 'subcarrier_34_rx0_tx0_phase', 'subcarrier_34_rx0_tx1_phase', 'subcarrier_34_rx1_tx0

In [31]:
columns_to_drop = ['csi_size', 'ftm_clock', 'num_rx', 'num_tx', 'num_subcarriers', 
                   'source_mac', 'rate_format', 'channel_width',
                   'mcs', 'antenna_a', 'antenna_b', 'ldpc', 'ss', 'beamforming','rssi1', 'rssi2']
data = data.drop(columns=columns_to_drop, errors='ignore')
for column in data.columns:
    print(column)

subcarrier_0_rx0_tx0_amplitude
subcarrier_0_rx0_tx0_phase
subcarrier_0_rx0_tx1_amplitude
subcarrier_0_rx0_tx1_phase
subcarrier_0_rx1_tx0_amplitude
subcarrier_0_rx1_tx0_phase
subcarrier_0_rx1_tx1_amplitude
subcarrier_0_rx1_tx1_phase
subcarrier_1_rx0_tx0_amplitude
subcarrier_1_rx0_tx0_phase
subcarrier_1_rx0_tx1_amplitude
subcarrier_1_rx0_tx1_phase
subcarrier_1_rx1_tx0_amplitude
subcarrier_1_rx1_tx0_phase
subcarrier_1_rx1_tx1_amplitude
subcarrier_1_rx1_tx1_phase
subcarrier_2_rx0_tx0_amplitude
subcarrier_2_rx0_tx0_phase
subcarrier_2_rx0_tx1_amplitude
subcarrier_2_rx0_tx1_phase
subcarrier_2_rx1_tx0_amplitude
subcarrier_2_rx1_tx0_phase
subcarrier_2_rx1_tx1_amplitude
subcarrier_2_rx1_tx1_phase
subcarrier_3_rx0_tx0_amplitude
subcarrier_3_rx0_tx0_phase
subcarrier_3_rx0_tx1_amplitude
subcarrier_3_rx0_tx1_phase
subcarrier_3_rx1_tx0_amplitude
subcarrier_3_rx1_tx0_phase
subcarrier_3_rx1_tx1_amplitude
subcarrier_3_rx1_tx1_phase
subcarrier_4_rx0_tx0_amplitude
subcarrier_4_rx0_tx0_phase
subcarrier_4_r

In [32]:
data.to_csv("CSV_Library/with_obstacles.csv", index=False)