# Pipeline for CSI signals

## Load dataset with every subcarrier, add receiver_pc and calculate amplitude and phase

In [1]:
import pandas as pd
import numpy as np
import re

file_paths = ['new dataset without obstacles/pc_1_without_train_60_min.csv', 'new dataset without obstacles/pc_2_without_train_60_min.csv', 
              'new dataset without obstacles/pc_3_without_train_60_min.csv', 'new dataset without obstacles/pc_4_without_train_60_min.csv']


dataframes = []
for file_path in file_paths:
    df = pd.read_csv(file_path)
    receiver_pc = file_path.split('_')[1] 
    df['receiver_pc'] = receiver_pc       
    dataframes.append(df)


data = pd.concat(dataframes, ignore_index=True)


pattern = r'subcarrier_(\d+)_rx(\d+)_tx(\d+)_(real|imag)'


amplitude_phase_cols = {}


for col in data.columns:
    match = re.match(pattern, col)
    if match:
        subcarrier_num = int(match.group(1))
        rx = int(match.group(2))
        tx = int(match.group(3))
        component = match.group(4)
        
        key = f'subcarrier_{subcarrier_num}_rx{rx}_tx{tx}'
        
        if key not in amplitude_phase_cols:
            amplitude_phase_cols[key] = {'real': None, 'imag': None}
        
        amplitude_phase_cols[key][component] = col


amplitude_phase_data = {}


for key, components in amplitude_phase_cols.items():
    real_col = components['real']
    imag_col = components['imag']
    
    if real_col and imag_col:
        
        amplitude = np.sqrt(data[real_col]**2 + data[imag_col]**2)
        phase = np.arctan2(data[imag_col], data[real_col])
        
        amplitude_phase_data[f'{key}_amplitude'] = amplitude
        amplitude_phase_data[f'{key}_phase'] = phase


amplitude_phase_df = pd.DataFrame(amplitude_phase_data)


data = pd.concat([data, amplitude_phase_df], axis=1)


real_imag_cols = [components[component] for key, components in amplitude_phase_cols.items() for component in ['real', 'imag']]
data.drop(columns=real_imag_cols, inplace=True)


column_order = [col for col in data.columns if col != 'receiver_pc'] + ['receiver_pc']
data = data[column_order]


# Add column for sender and remove mac address

In [2]:
import pandas as pd

# Extract the sender ID from the last two characters of `mac_address`
data['sender'] = data['source_mac'].str[-1:]
data.drop(columns=['source_mac'], inplace=True)



# Legge til system_time som kolonne og synkronisere basert på kolonne, regne ut total lengde av hvert datasett (må huske å synkronisere og fjerne slik at de starter lik ish alle pc-er)

In [3]:
import numpy as np
from datetime import datetime, timedelta


FTM_TICK_DURATION = 3.125e-9 
MAX_FTM_TICKS = 4294967295    


def calculate_system_time(ftm_clock_values, start_time_str):
    start_time = np.datetime64(start_time_str, 'ns')
    
    system_times = np.zeros(len(ftm_clock_values), dtype='datetime64[ns]')
    
    current_offset = 0

    for i in range(len(ftm_clock_values)):
        if i == 0:
            system_times[i] = start_time
        else:
            if ftm_clock_values[i] < ftm_clock_values[i - 1]:
                current_offset += MAX_FTM_TICKS + 1

            elapsed_time_ns = (ftm_clock_values[i] + current_offset - ftm_clock_values[0]) * FTM_TICK_DURATION * 1e9
            system_times[i] = start_time + np.timedelta64(int(elapsed_time_ns), 'ns') 
    
    return system_times


pc1_start_time = "2024-12-06 13:36:00.108961414"
pc2_start_time = "2024-12-06 13:38:00.681246155" 
pc3_start_time = "2024-12-06 13:40:00.401039006"  
pc4_start_time = "2024-12-06 13:42:00.568819514"  


data['system_time'] = None  


def assign_system_time_for_pc(receiver_pc, start_time):
    mask = data['receiver_pc'] == receiver_pc
    ftm_clock_values = data.loc[mask, 'ftm_clock'].values
    system_times = calculate_system_time(ftm_clock_values, start_time)
    data.loc[mask, 'system_time'] = system_times

    print(f"\nCalculated system times for PC {receiver_pc}:")
    print(data.loc[mask, ['ftm_clock', 'system_time']].head(10)) 


assign_system_time_for_pc('1', pc1_start_time)
assign_system_time_for_pc('2', pc2_start_time)
assign_system_time_for_pc('3', pc3_start_time)
assign_system_time_for_pc('4', pc4_start_time)


data = data.sort_values(by='system_time').reset_index(drop=True)



Calculated system times for PC 1:
   ftm_clock                    system_time
0  346787784  2024-12-06T13:36:00.108961414
1  347065944  2024-12-06T13:36:00.109830664
2  353831336  2024-12-06T13:36:00.130972514
3  363037512  2024-12-06T13:36:00.159741814
4  363439784  2024-12-06T13:36:00.160998914
5  369842056  2024-12-06T13:36:00.181006014
6  379010904  2024-12-06T13:36:00.209658664
7  380109064  2024-12-06T13:36:00.213090414
8  385835192  2024-12-06T13:36:00.230984564
9  395106072  2024-12-06T13:36:00.259956063

Calculated system times for PC 2:
         ftm_clock                    system_time
256713  3171391680  2024-12-06T13:38:00.681246155
256714  3173970048  2024-12-06T13:38:00.689303554
256715  3176595328  2024-12-06T13:38:00.697507554
256716  3187392432  2024-12-06T13:38:00.731248505
256717  3189974336  2024-12-06T13:38:00.739316955
256718  3192509616  2024-12-06T13:38:00.747239705
256719  3203395936  2024-12-06T13:38:00.781259455
256720  3205956000  2024-12-06T13:38:00.789259

# Check last measurement of each PC

In [4]:
pc2_data = data[data['receiver_pc'] == '2']


if not pc2_data.empty:
    pc2_last_measurement = pc2_data.iloc[-1]

    print("Den siste målingen for receiver_pc = 2:")
    print(pc2_last_measurement)
else:
    print("Ingen målinger funnet for receiver_pc = 2 i datasettet.")

receiver_4_data = data[data['receiver_pc'] == '4'].copy()

print(receiver_4_data.head(20))



Den siste målingen for receiver_pc = 2:
csi_size                                                     896
ftm_clock                                             3571322528
num_rx                                                         2
num_tx                                                         2
num_subcarriers                                               56
                                               ...              
subcarrier_55_rx1_tx1_amplitude                         8.944272
subcarrier_55_rx1_tx1_phase                            -2.034444
receiver_pc                                                    2
sender                                                         3
system_time                        2024-12-06T14:48:16.367689254
Name: 990094, Length: 466, dtype: object
       csi_size   ftm_clock  num_rx  num_tx  num_subcarriers  rssi1  rssi2  \
43202       896  3386818108       2       2               56     56     50   
43207       896  3393201724       2       2     

# Remove everything before and after all pc's log at the same time (three senders repeats per pc)

In [5]:

data = data.sort_values(by='system_time').reset_index(drop=True)


valid_start_times = data.groupby('receiver_pc')['system_time'].first()


latest_valid_start = valid_start_times.max()


data = data[data['system_time'] >= latest_valid_start]


valid_end_times = data.groupby('receiver_pc')['system_time'].last()


earliest_valid_end = valid_end_times.min()


data = data[data['system_time'] <= earliest_valid_end]


durations = data.groupby('receiver_pc')['system_time'].agg([np.min, np.max])
durations['duration'] = durations['amax'] - durations['amin']


print("Varighet for hver receiver_pc basert på gyldige målinger:")
print(durations[['duration']])
print("\nEndelige data etter fjerning av målinger post første PC-stopp:")
print(data.head(20))


Varighet for hver receiver_pc basert på gyldige målinger:
                             duration
receiver_pc                          
1           0 days 01:05:18.987653900
2           0 days 01:05:18.932129050
3           0 days 01:05:18.956040950
4           0 days 01:05:18.961793950

Endelige data etter fjerning av målinger post første PC-stopp:
       csi_size   ftm_clock  num_rx  num_tx  num_subcarriers  rssi1  rssi2  \
43202       896  3386818108       2       2               56     56     50   
43203       896  4024805608       2       2               56     54     48   
43204       896  1159043300       2       2               56     52     53   
43205       896  1160128948       2       2               56     41     55   
43206       896  2631541904       2       2               56     41     51   
43207       896  3393201724       2       2               56     54     50   
43208       896  4031234920       2       2               56     50     54   
43209       896  263407044

In [6]:
start_time = pd.to_datetime("2024-12-06 13:45:00")
end_time = pd.to_datetime("2024-12-06 14:47:00")


data['system_time'] = pd.to_datetime(data['system_time'])


data = data[(data['system_time'] >= start_time) & (data['system_time'] <= end_time)]

print("Data filtered to 62 minutes:")
print(data[['receiver_pc', 'system_time']].head())
print(data[['receiver_pc', 'system_time']].tail())


Data filtered to 62 minutes:
      receiver_pc                   system_time
86248           4 2024-12-06 13:45:00.000761314
86249           3 2024-12-06 13:45:00.007892256
86250           2 2024-12-06 13:45:00.018057804
86251           2 2024-12-06 13:45:00.020809805
86252           1 2024-12-06 13:45:00.021311263
       receiver_pc                   system_time
978884           3 2024-12-06 14:46:59.976082705
978885           4 2024-12-06 14:46:59.984692614
978886           4 2024-12-06 14:46:59.985072264
978887           1 2024-12-06 14:46:59.985127164
978888           3 2024-12-06 14:46:59.992315655


# Legge til annotert data og fjerne signaler og tid som eksisterer før datasettet har offisielt har begynt

In [7]:
annotations = pd.read_excel('new dataset without obstacles/dataset_annotated_without_obstalces.xlsx')


annotations['time_annotated'] = pd.to_datetime("2024-12-06 " + annotations['time_annotated'].astype(str))


annotations['End Time'] = annotations['time_annotated'].shift(-1)
annotations = annotations.dropna()  


data = pd.merge_asof(data.sort_values('system_time'), 
                     annotations[['time_annotated', 'End Time', 'movement']],
                     left_on='system_time', right_on='time_annotated', 
                     direction='backward')


data = data[(data['system_time'] >= data['time_annotated']) & (data['system_time'] < data['End Time'])]
data = data.drop(columns=['End Time', 'time_annotated'])

print(data.tail(20))


        csi_size   ftm_clock  num_rx  num_tx  num_subcarriers  rssi1  rssi2  \
881337       896  1242723076       2       2               56     48     52   
881338       896  3471689196       2       2               56     49     50   
881339       896  4109703096       2       2               56     51     56   
881340       896  3471924364       2       2               56     55     59   
881341       896  1245472660       2       2               56     54     51   
881342       896  4116935176       2       2               56     50     50   
881343       896  3479099772       2       2               56     55     52   
881344       896  2718866688       2       2               56     54     54   
881345       896  1252593972       2       2               56     40     54   
881346       896  4121590936       2       2               56     55     61   
881347       896  2723064688       2       2               56     53     56   
881348       896  2723136304       2       2        

In [8]:
import os

output_directory = "complete dataset csv without obstacles"
output_filename = "complete_60_min_without_obstacles.csv"


os.makedirs(output_directory, exist_ok=True)


output_filepath = os.path.join(output_directory, output_filename)


try:
    data.to_csv(output_filepath, index=False)
    print(f"DataFrame lagret som CSV-fil i: {output_filepath}")
except Exception as e:
    print(f"En feil oppstod ved lagring av CSV-filen: {e}")


DataFrame lagret som CSV-fil i: complete dataset csv without obstacles\complete_60_min_without_obstacles.csv


In [9]:
data.head(20)

Unnamed: 0,csi_size,ftm_clock,num_rx,num_tx,num_subcarriers,rssi1,rssi2,rate_format,channel_width,mcs,...,subcarrier_55_rx0_tx1_amplitude,subcarrier_55_rx0_tx1_phase,subcarrier_55_rx1_tx0_amplitude,subcarrier_55_rx1_tx0_phase,subcarrier_55_rx1_tx1_amplitude,subcarrier_55_rx1_tx1_phase,receiver_pc,sender,system_time,movement
0,896,675497340,2,2,56,58,58,VHT,20,0,...,80.099938,0.049958,106.320271,-2.855541,55.326305,-3.032931,4,1,2024-12-06 13:45:00.000761314,C
1,896,2743824420,2,2,56,52,53,VHT,20,0,...,32.015621,-0.896055,101.533246,-2.841645,65.520989,-2.863293,3,1,2024-12-06 13:45:00.007892256,C
2,896,4215185232,2,2,56,57,52,VHT,20,0,...,113.017698,1.031894,102.839681,1.806341,142.688472,0.545227,2,4,2024-12-06 13:45:00.018057804,C
3,896,4216065872,2,2,56,40,51,VHT,20,0,...,27.856777,-0.367174,106.606754,2.509348,42.544095,-1.159732,2,3,2024-12-06 13:45:00.020809805,C
4,896,1320047896,2,2,56,59,60,VHT,20,0,...,95.015788,-2.612003,82.734515,0.776851,43.416587,0.671144,1,4,2024-12-06 13:45:00.021311263,C
5,896,682098812,2,2,56,54,51,VHT,20,0,...,128.693434,1.961403,68.425142,-1.100613,127.914034,-2.455863,4,2,2024-12-06 13:45:00.021390913,C
6,896,1320128904,2,2,56,53,49,VHT,20,0,...,39.204592,-0.91199,146.768525,-1.468416,15.556349,-2.356194,1,2,2024-12-06 13:45:00.021564414,C
7,896,682756060,2,2,56,50,51,VHT,20,0,...,47.539457,0.255182,67.178866,-2.345669,93.407708,2.485244,4,3,2024-12-06 13:45:00.023444814,C
8,896,1320928520,2,2,56,52,55,VHT,20,0,...,79.246451,0.821097,47.169906,-1.2021,71.168813,-1.870328,1,3,2024-12-06 13:45:00.024063214,C
9,896,2750363620,2,2,56,49,54,VHT,20,0,...,50.289164,-0.302885,72.99315,-1.735945,127.141653,0.50939,3,4,2024-12-06 13:45:00.028327256,C
