# Pipeline for CSI signals

## Load dataset with every subcarrier, add receiver_pc and calculate amplitude and phase

In [1]:
import pandas as pd
import numpy as np
import re


file_paths = ['new dataset with obstacles/pc_1_with_train_30_min.csv', 'new dataset with obstacles/pc_2_with_train_30_min.csv', 
              'new dataset with obstacles/pc_3_with_train_30_min.csv', 'new dataset with obstacles/pc_4_with_train_30_min.csv']

# Load and concatenate the datasets, adding 'receiver_pc' column for each PC
dataframes = []
for file_path in file_paths:
    df = pd.read_csv(file_path)
    receiver_pc = file_path.split('_')[1]  # Extract the PC identifier (e.g., '1' or '2')
    df['receiver_pc'] = receiver_pc        # Add 'receiver_pc' column
    dataframes.append(df)

# Concatenate the datasets into one DataFrame
data = pd.concat(dataframes, ignore_index=True)

# Regular expression to extract subcarrier information
pattern = r'subcarrier_(\d+)_rx(\d+)_tx(\d+)_(real|imag)'

# Prepare lists to store amplitude and phase calculations
amplitude_phase_cols = {}

# Iterate over the columns and process real and imag values
for col in data.columns:
    match = re.match(pattern, col)
    if match:
        subcarrier_num = int(match.group(1))
        rx = int(match.group(2))
        tx = int(match.group(3))
        component = match.group(4)
        
        key = f'subcarrier_{subcarrier_num}_rx{rx}_tx{tx}'
        
        if key not in amplitude_phase_cols:
            amplitude_phase_cols[key] = {'real': None, 'imag': None}
        
        amplitude_phase_cols[key][component] = col

# Prepare lists to store new amplitude and phase columns
amplitude_phase_data = {}

# Calculate amplitude and phase in bulk and store them
for key, components in amplitude_phase_cols.items():
    real_col = components['real']
    imag_col = components['imag']
    
    if real_col and imag_col:
        # Calculate amplitude and phase
        amplitude = np.sqrt(data[real_col]**2 + data[imag_col]**2)
        phase = np.arctan2(data[imag_col], data[real_col])
        
        amplitude_phase_data[f'{key}_amplitude'] = amplitude
        amplitude_phase_data[f'{key}_phase'] = phase


amplitude_phase_df = pd.DataFrame(amplitude_phase_data)

# Concatenate the new columns with the original data
data = pd.concat([data, amplitude_phase_df], axis=1)

# Drop the original real and imag columns
real_imag_cols = [components[component] for key, components in amplitude_phase_cols.items() for component in ['real', 'imag']]
data.drop(columns=real_imag_cols, inplace=True)


column_order = [col for col in data.columns if col != 'receiver_pc'] + ['receiver_pc']
data = data[column_order]




# Add column for sender and remove mac address

In [2]:
import pandas as pd

# Extract the sender ID from the last two characters of `mac_address`
data['sender'] = data['source_mac'].str[-1:]

# Drop the `mac_address` column as it's no longer needed
data.drop(columns=['source_mac'], inplace=True)

# Inspect the modified DataFrame
#print(data)


# Legge til system_time som kolonne og synkronisere basert på kolonne, regne ut total lengde av hvert datasett (må huske å synkronisere og fjerne slik at de starter lik ish alle pc-er)

In [3]:
import numpy as np
from datetime import datetime, timedelta

# Constants
FTM_TICK_DURATION = 3.125e-9  # 3.125 ns per tick
MAX_FTM_TICKS = 4294967295    # MAX value before overflow of ftm_clock (~13.4 seconds)


def calculate_system_time(ftm_clock_values, start_time_str):
    
    start_time = np.datetime64(start_time_str, 'ns')  # Nanosecond precision start time
    
    
    system_times = np.zeros(len(ftm_clock_values), dtype='datetime64[ns]')
    
    
    current_offset = 0

    for i in range(len(ftm_clock_values)):
        if i == 0:
            
            system_times[i] = start_time
        else:
            
            if ftm_clock_values[i] < ftm_clock_values[i - 1]:
                # Legg til overflytsverdien til offseten
                current_offset += MAX_FTM_TICKS + 1

            # Beregn elapsed time i nanosekunder med hensyn til overflyt
            elapsed_time_ns = (ftm_clock_values[i] + current_offset - ftm_clock_values[0]) * FTM_TICK_DURATION * 1e9
            system_times[i] = start_time + np.timedelta64(int(elapsed_time_ns), 'ns')  # Nanosecond-level precision
    
    return system_times

# Starttider for hver PC
pc1_start_time = "2024-12-06 16:57:00.641571495"
pc2_start_time = "2024-12-06 16:58:00.439184619" 
pc3_start_time = "2024-12-06 16:59:00.469645341"  
pc4_start_time = "2024-12-06 17:00:00.538706886"  

# Calculate system times for each PC and assign to data
data['system_time'] = None  # Initialize system_time column

# Helper function to calculate and assign system times
def assign_system_time_for_pc(receiver_pc, start_time):
    mask = data['receiver_pc'] == receiver_pc
    ftm_clock_values = data.loc[mask, 'ftm_clock'].values
    system_times = calculate_system_time(ftm_clock_values, start_time)
    data.loc[mask, 'system_time'] = system_times
    # Debug print: Verify calculated system times for each PC
    print(f"\nCalculated system times for PC {receiver_pc}:")
    print(data.loc[mask, ['ftm_clock', 'system_time']].head(10))  # Print first 10 values

# Apply to each PC
assign_system_time_for_pc('1', pc1_start_time)
assign_system_time_for_pc('2', pc2_start_time)
assign_system_time_for_pc('3', pc3_start_time)
assign_system_time_for_pc('4', pc4_start_time)

# Sort the dataset by system_time
data = data.sort_values(by='system_time').reset_index(drop=True)

# Debug print: Verify entire data
#print("\nFinal system_time verification for all PCs:")
#print(data['system_time'].tail(200))


Calculated system times for PC 1:
    ftm_clock                    system_time
0  2535734592  2024-12-06T16:57:00.641571495
1  2538175328  2024-12-06T16:57:00.649198795
2  2547207312  2024-12-06T16:57:00.677423744
3  2551678752  2024-12-06T16:57:00.691396994
4  2554066016  2024-12-06T16:57:00.698857194
5  2562338976  2024-12-06T16:57:00.724710195
6  2567741056  2024-12-06T16:57:00.741591694
7  2569988304  2024-12-06T16:57:00.748614345
8  2577494976  2024-12-06T16:57:00.772072694
9  2583718992  2024-12-06T16:57:00.791522744

Calculated system times for PC 2:
         ftm_clock                    system_time
138969  3901971840  2024-12-06T16:58:00.439184619
138970  3902639136  2024-12-06T16:58:00.441269919
138971  3903258448  2024-12-06T16:58:00.443205268
138972  3917787568  2024-12-06T16:58:00.488608769
138973  3917997968  2024-12-06T16:58:00.489266269
138974  3919314640  2024-12-06T16:58:00.493380868
138975  3932938480  2024-12-06T16:58:00.535955368
138976  3933987456  2024-12-06T16:5

# Check last measurement of each PC

In [4]:
# Filtrer for å beholde kun data for receiver_pc = 2
pc2_data = data[data['receiver_pc'] == '2']

# Sjekk om det er noen data tilgjengelig for receiver_pc = 2
if not pc2_data.empty:
    # Finn den siste målingen for receiver_pc = 2
    pc2_last_measurement = pc2_data.iloc[-1]
    
    # Skriv ut den siste målingen for receiver_pc = 2
    print("Den siste målingen for receiver_pc = 2:")
    print(pc2_last_measurement)
else:
    print("Ingen målinger funnet for receiver_pc = 2 i datasettet.")
    
# Filter the data to include only rows where 'receiver_pc' is '2'
receiver_4_data = data[data['receiver_pc'] == '4'].copy()

# Print the first few rows of the filtered DataFrame to inspect
print(receiver_4_data.head(20))



Den siste målingen for receiver_pc = 2:
csi_size                                                     896
ftm_clock                                             3056896176
num_rx                                                         2
num_tx                                                         2
num_subcarriers                                               56
                                               ...              
subcarrier_55_rx1_tx1_amplitude                        55.036352
subcarrier_55_rx1_tx1_phase                            -0.036348
receiver_pc                                                    2
sender                                                         3
system_time                        2024-12-06T17:36:26.343244769
Name: 539131, Length: 466, dtype: object
       csi_size   ftm_clock  num_rx  num_tx  num_subcarriers  rssi1  rssi2  \
21591       896  2583161268       2       2               56     53     46   
21597       896  2588373796       2       2     

# Remove everything before and after all pc's log at the same time (three senders repeats per pc)

In [5]:
import numpy as np

# Sørg for at dataene er sortert etter system_time
data = data.sort_values(by='system_time').reset_index(drop=True)

# Finn første gyldig systemtid for hver PC
valid_start_times = data.groupby('receiver_pc')['system_time'].first()

# Finn det siste tidspunktet alle PC-er har avgitt minst en måling
latest_valid_start = valid_start_times.max()

# Filtrer ut data før dette tidspunktet
data = data[data['system_time'] >= latest_valid_start]

# Finn den siste gyldige målingen for hver PC
valid_end_times = data.groupby('receiver_pc')['system_time'].last()

# Finn den tidligste av de siste gyldige målingene
earliest_valid_end = valid_end_times.min()

# Filtrer ut data etter det tidligste sluttpunktet hvor en PC sluttet å måle
data = data[data['system_time'] <= earliest_valid_end]

# Beregn varigheten for hver PC fra deres første til siste måling innenfor gyldig tidsrom
durations = data.groupby('receiver_pc')['system_time'].agg([np.min, np.max])
durations['duration'] = durations['amax'] - durations['amin']

# Skriv ut varighetene for hver receiver_pc og de endelige dataene
print("Varighet for hver receiver_pc basert på gyldige målinger:")
print(durations[['duration']])
print("\nEndelige data etter fjerning av målinger post første PC-stopp:")
print(data.head(20))


Varighet for hver receiver_pc basert på gyldige målinger:
                             duration
receiver_pc                          
1           0 days 00:35:37.456483950
2           0 days 00:35:37.450518850
3           0 days 00:35:37.421797851
4           0 days 00:35:37.459544599

Endelige data etter fjerning av målinger post første PC-stopp:
       csi_size   ftm_clock  num_rx  num_tx  num_subcarriers  rssi1  rssi2  \
21591       896  2583161268       2       2               56     53     46   
21592       896  3679641456       2       2               56     49     51   
21593       896  3680119680       2       2               56     49     59   
21594       896  4269473760       2       2               56     56     49   
21595       896  3682000736       2       2               56     38     48   
21596       896  4271833040       2       2               56     51     59   
21597       896  2588373796       2       2               56     53     49   
21598       896  259025485

In [6]:
import pandas as pd

# Define the desired start and end times
start_time = pd.to_datetime("2024-12-06 17:04:00")
end_time = pd.to_datetime("2024-12-06 17:19:00") # 15 minutes

# Ensure 'system_time' column is in datetime format 
data['system_time'] = pd.to_datetime(data['system_time'])

# Filter the data to only include rows within the 30-minute interval
data = data[(data['system_time'] >= start_time) & (data['system_time'] <= end_time)]

# Display the filtered data
print("Data filtered to 15 minutes:")
print(data[['receiver_pc', 'system_time']].head())
print(data[['receiver_pc', 'system_time']].tail())


Data filtered to 15 minutes:
      receiver_pc                   system_time
79049           2 2024-12-06 17:04:00.000614569
79050           1 2024-12-06 17:04:00.003457044
79051           4 2024-12-06 17:04:00.011225785
79052           4 2024-12-06 17:04:00.014619386
79053           3 2024-12-06 17:04:00.027300091
       receiver_pc                   system_time
294972           4 2024-12-06 17:18:59.968705035
294973           2 2024-12-06 17:18:59.972645868
294974           1 2024-12-06 17:18:59.977985645
294975           4 2024-12-06 17:18:59.984556086
294976           4 2024-12-06 17:18:59.989860885


# Legge til annotert data og fjerne signaler og tid som eksisterer før datasettet har offisielt har begynt

In [7]:
import pandas as pd

# Step 1: Load the annotated data from Excel
annotations = pd.read_excel('new dataset without obstacles/dataset_annotated_without_obstalces.xlsx')

# Step 2: Add the date to the 'Tid' column in annotations
# Assuming all entries are for the date 2024-12-06
annotations['time_annotated'] = pd.to_datetime("2024-12-06 " + annotations['time_annotated'].astype(str))

# Step 3: Define the end time for each annotation interval
annotations['End Time'] = annotations['time_annotated'].shift(-1)  # Set end time to the next start time
annotations = annotations.dropna()  # Drop the last row as it has no defined end time

# Step 4: Match each row in `data` to the correct interval in `annotations`
# Use merge_asof to merge the data and annotations based on closest preceding 'Tid'
data = pd.merge_asof(data.sort_values('system_time'), 
                     annotations[['time_annotated', 'End Time', 'movement']],
                     left_on='system_time', right_on='time_annotated', 
                     direction='backward')

# Step 5: Filter to keep only rows within the correct time intervals
data = data[(data['system_time'] >= data['time_annotated']) & (data['system_time'] < data['End Time'])]
data = data.drop(columns=['End Time', 'time_annotated'])

# The data DataFrame now includes the 'Bevegelse' column with the appropriate movement category
print(data.tail(20))


Empty DataFrame
Columns: [csi_size, ftm_clock, num_rx, num_tx, num_subcarriers, rssi1, rssi2, rate_format, channel_width, mcs, antenna_a, antenna_b, ldpc, ss, beamforming, subcarrier_0_rx0_tx0_amplitude, subcarrier_0_rx0_tx0_phase, subcarrier_0_rx0_tx1_amplitude, subcarrier_0_rx0_tx1_phase, subcarrier_0_rx1_tx0_amplitude, subcarrier_0_rx1_tx0_phase, subcarrier_0_rx1_tx1_amplitude, subcarrier_0_rx1_tx1_phase, subcarrier_1_rx0_tx0_amplitude, subcarrier_1_rx0_tx0_phase, subcarrier_1_rx0_tx1_amplitude, subcarrier_1_rx0_tx1_phase, subcarrier_1_rx1_tx0_amplitude, subcarrier_1_rx1_tx0_phase, subcarrier_1_rx1_tx1_amplitude, subcarrier_1_rx1_tx1_phase, subcarrier_2_rx0_tx0_amplitude, subcarrier_2_rx0_tx0_phase, subcarrier_2_rx0_tx1_amplitude, subcarrier_2_rx0_tx1_phase, subcarrier_2_rx1_tx0_amplitude, subcarrier_2_rx1_tx0_phase, subcarrier_2_rx1_tx1_amplitude, subcarrier_2_rx1_tx1_phase, subcarrier_3_rx0_tx0_amplitude, subcarrier_3_rx0_tx0_phase, subcarrier_3_rx0_tx1_amplitude, subcarrier_3_rx0

In [8]:
import os

# Angi filbanen der CSV-filen skal lagres
output_directory = "complete dataset csv with obstacles"
output_filename = "complete_15_min_with_obstacles.csv"

# Opprett mappen hvis den ikke eksisterer
os.makedirs(output_directory, exist_ok=True)

# Full filsti
output_filepath = os.path.join(output_directory, output_filename)

# Lagre DataFrame til CSV
try:
    data.to_csv(output_filepath, index=False)
    print(f"DataFrame lagret som CSV-fil i: {output_filepath}")
except Exception as e:
    print(f"En feil oppstod ved lagring av CSV-filen: {e}")


DataFrame lagret som CSV-fil i: complete dataset csv with obstacles\complete_15_min_with_obstacles.csv


In [9]:
data.head(20)

Unnamed: 0,csi_size,ftm_clock,num_rx,num_tx,num_subcarriers,rssi1,rssi2,rate_format,channel_width,mcs,...,subcarrier_55_rx0_tx1_amplitude,subcarrier_55_rx0_tx1_phase,subcarrier_55_rx1_tx0_amplitude,subcarrier_55_rx1_tx0_phase,subcarrier_55_rx1_tx1_amplitude,subcarrier_55_rx1_tx1_phase,receiver_pc,sender,system_time,movement


In [10]:
import pandas as pd
import numpy as np

# Angi filbanen til CSV-filen
file_path = "complete dataset csv without obstacles/complete_60_min_without_obstacles.csv"

# Last inn CSV-filen som en DataFrame
data = pd.read_csv(file_path)

In [11]:
data.head()
for column in data.columns:
    print(column)

csi_size
ftm_clock
num_rx
num_tx
num_subcarriers
rssi1
rssi2
rate_format
channel_width
mcs
antenna_a
antenna_b
ldpc
ss
beamforming
subcarrier_0_rx0_tx0_amplitude
subcarrier_0_rx0_tx0_phase
subcarrier_0_rx0_tx1_amplitude
subcarrier_0_rx0_tx1_phase
subcarrier_0_rx1_tx0_amplitude
subcarrier_0_rx1_tx0_phase
subcarrier_0_rx1_tx1_amplitude
subcarrier_0_rx1_tx1_phase
subcarrier_1_rx0_tx0_amplitude
subcarrier_1_rx0_tx0_phase
subcarrier_1_rx0_tx1_amplitude
subcarrier_1_rx0_tx1_phase
subcarrier_1_rx1_tx0_amplitude
subcarrier_1_rx1_tx0_phase
subcarrier_1_rx1_tx1_amplitude
subcarrier_1_rx1_tx1_phase
subcarrier_2_rx0_tx0_amplitude
subcarrier_2_rx0_tx0_phase
subcarrier_2_rx0_tx1_amplitude
subcarrier_2_rx0_tx1_phase
subcarrier_2_rx1_tx0_amplitude
subcarrier_2_rx1_tx0_phase
subcarrier_2_rx1_tx1_amplitude
subcarrier_2_rx1_tx1_phase
subcarrier_3_rx0_tx0_amplitude
subcarrier_3_rx0_tx0_phase
subcarrier_3_rx0_tx1_amplitude
subcarrier_3_rx0_tx1_phase
subcarrier_3_rx1_tx0_amplitude
subcarrier_3_rx1_tx0_phase

In [12]:
data['system_time'] = pd.to_datetime(data['system_time'])

# 2) Sorter data etter tid (hvis ikke allerede sortert)
data.sort_values(by='system_time', inplace=True)

# 3) Finn start-tid og lag relativ tidskolonne (sekunder fra start)
start_time = data['system_time'].min()
data['time_since_start'] = (data['system_time'] - start_time).dt.total_seconds()

# 4) Lag differansekolonne (sekunder mellom hver rad)
data['time_diff'] = data['time_since_start'].diff().fillna(0)

# 5) Fjern den opprinnelige system_time-kolonnen
data.drop(columns=['system_time'], inplace=True)

# Sjekk resultatet
print(data.head())

   csi_size   ftm_clock  num_rx  num_tx  num_subcarriers  rssi1  rssi2  \
0       896   675497340       2       2               56     58     58   
1       896  2743824420       2       2               56     52     53   
2       896  4215185232       2       2               56     57     52   
3       896  4216065872       2       2               56     40     51   
4       896  1320047896       2       2               56     59     60   

  rate_format  channel_width  mcs  ...  subcarrier_55_rx0_tx1_phase  \
0         VHT             20    0  ...                     0.049958   
1         VHT             20    0  ...                    -0.896055   
2         VHT             20    0  ...                     1.031894   
3         VHT             20    0  ...                    -0.367174   
4         VHT             20    0  ...                    -2.612003   

   subcarrier_55_rx1_tx0_amplitude  subcarrier_55_rx1_tx0_phase  \
0                       106.320271                    -2.8555

In [13]:
# -------------------------------------------------------------------
# 2) Finn alle amplitude- og fasekolonner
#    Vi leter etter mønsteret:  subcarrier_{i}_rxX_txY_amplitude  /  ..._phase
# -------------------------------------------------------------------
all_columns = data.columns.tolist()

amplitude_cols = [c for c in all_columns if "_amplitude" in c and "subcarrier_" in c]
phase_cols     = [c for c in all_columns if "_phase"     in c and "subcarrier_" in c]

# Eksempel: "subcarrier_10_rx0_tx1_amplitude" -> subcarrier index = 10, rx=0, tx=1
# Om du vil matche mer presist kan du bruke regex, men her er en enkel filter.
# Du kan også kun bruke amplitude_cols = [c for c in all_columns if re.match(..., c)].

# -------------------------------------------------------------------
# 3) Definer terskel for fjerning og data-struktur for logging
# -------------------------------------------------------------------
amplitude_threshold_frac = 0.001  # 0.1% av max amplitude i gruppen
removed_columns_info = {}  # dict for logging: { receiver_pc -> [liste av fjernede kolonner] }

# -------------------------------------------------------------------
# 4) Gå gjennom hver mottaker-PC (eller `(receiver_pc, sender)`).
#    Filtrer data og identifiser subcarriers med for lav amplitude.
#    Fjern amplitude + tilhørende fase for hele datasettet.
# -------------------------------------------------------------------
# Du kan justere om du vil gruppere på bare "receiver_pc"
# eller på ("receiver_pc","sender"). Nedenfor grupperer vi på bare "receiver_pc".
grouped = data.groupby("receiver_pc")

for rx_pc, df_group in grouped:
    # Finn global maks amplitude i denne PC-gruppen (over alle rader i gruppen).
    # NB: Du kan også filtrere på "sender" om du ønsker.
    # Her ser vi på alle amplitude-kolonner i df_group.
    
    # 1) Bygg en matrise for amplitude i df_group
    #    (formelt: rader = tid, kolonner = subcarrier-col).
    subcarrier_max_values = {}
    for col in amplitude_cols:
        if col in df_group.columns:
            # max amplitude i denne PC-gruppen for kolonnen 'col'
            col_max = df_group[col].max(skipna=True)
            subcarrier_max_values[col] = col_max
        else:
            subcarrier_max_values[col] = 0.0  # finnes ikke i denne PC-gruppen
    
    # 2) Finn global maks (over alle amplitude-kolonner i gruppen)
    global_max_ampl = max(subcarrier_max_values.values()) if subcarrier_max_values else 0
    
    if global_max_ampl <= 0:
        # Ingen gyldige amplituder, hopp over
        continue
    
    threshold_val = amplitude_threshold_frac * global_max_ampl
    
    # 3) Bestem hvilke amplitude-kolonner som må fjernes pga. lav max
    amplitude_cols_to_remove = []
    for col, cmax in subcarrier_max_values.items():
        if cmax < threshold_val:
            amplitude_cols_to_remove.append(col)
    
    # 4) Finn tilhørende fasekolonner
    #    For en amplitude-kolonne = "subcarrier_10_rxX_txY_amplitude",
    #    er fasekolonnen (om den finnes) = "subcarrier_10_rxX_txY_phase".
    phase_cols_to_remove = []
    for amp_col in amplitude_cols_to_remove:
        # Bytt ut '_amplitude' med '_phase'
        phase_col = amp_col.replace("_amplitude", "_phase")
        if phase_col in data.columns:
            phase_cols_to_remove.append(phase_col)
    
    # 5) Fjern disse kolonnene fra data (gjelder hele DataFrame, ikke bare i denne gruppen)
    # Samle alle kolonner å fjerne
    cols_to_remove = amplitude_cols_to_remove + phase_cols_to_remove
    
    # Filter bort de som faktisk finnes i data.columns
    cols_to_remove_final = [c for c in cols_to_remove if c in data.columns]
    
    if cols_to_remove_final:
        # Logg i dictionary
        if rx_pc not in removed_columns_info:
            removed_columns_info[rx_pc] = []
        removed_columns_info[rx_pc].extend(cols_to_remove_final)
        
        # Dropp dem fra 'data'
        data.drop(columns=cols_to_remove_final, inplace=True)

# -------------------------------------------------------------------
# 5) Print en enkel rapport over hva som ble fjernet per PC
# -------------------------------------------------------------------
print("FJERNE-RAPPORT (amplitude og fase), terskel = {}% av global max i hver PC-gruppe\n".format(
    amplitude_threshold_frac * 100
))

if not removed_columns_info:
    print("Ingen kolonner ble fjernet.")
else:
    for rx_pc, removed_cols in removed_columns_info.items():
        print(f"\nPC {rx_pc}: fjernet {len(removed_cols)} kolonner")
        print("  ->", removed_cols)

# -------------------------------------------------------------------
# Nå er 'data' oppdatert og alle “ubrukelige” amplitude/fase-kolonner er fjernet.
# Du kan lagre data om du vil:
# data.to_csv("cleaned_dataset.csv", index=False)
# -------------------------------------------------------------------

FJERNE-RAPPORT (amplitude og fase), terskel = 0.1% av global max i hver PC-gruppe


PC 1: fjernet 32 kolonner
  -> ['subcarrier_7_rx0_tx0_amplitude', 'subcarrier_7_rx0_tx1_amplitude', 'subcarrier_7_rx1_tx0_amplitude', 'subcarrier_7_rx1_tx1_amplitude', 'subcarrier_21_rx0_tx0_amplitude', 'subcarrier_21_rx0_tx1_amplitude', 'subcarrier_21_rx1_tx0_amplitude', 'subcarrier_21_rx1_tx1_amplitude', 'subcarrier_34_rx0_tx0_amplitude', 'subcarrier_34_rx0_tx1_amplitude', 'subcarrier_34_rx1_tx0_amplitude', 'subcarrier_34_rx1_tx1_amplitude', 'subcarrier_48_rx0_tx0_amplitude', 'subcarrier_48_rx0_tx1_amplitude', 'subcarrier_48_rx1_tx0_amplitude', 'subcarrier_48_rx1_tx1_amplitude', 'subcarrier_7_rx0_tx0_phase', 'subcarrier_7_rx0_tx1_phase', 'subcarrier_7_rx1_tx0_phase', 'subcarrier_7_rx1_tx1_phase', 'subcarrier_21_rx0_tx0_phase', 'subcarrier_21_rx0_tx1_phase', 'subcarrier_21_rx1_tx0_phase', 'subcarrier_21_rx1_tx1_phase', 'subcarrier_34_rx0_tx0_phase', 'subcarrier_34_rx0_tx1_phase', 'subcarrier_34_rx1_tx0

In [14]:
# Drop columns specified as non-essential for the model
columns_to_drop = ['csi_size', 'ftm_clock', 'num_rx', 'num_tx', 'num_subcarriers', 
                   'source_mac', 'rate_format', 'channel_width',
                   'mcs', 'antenna_a', 'antenna_b', 'ldpc', 'ss', 'beamforming','rssi1', 'rssi2']
data = data.drop(columns=columns_to_drop, errors='ignore')
for column in data.columns:
    print(column)

subcarrier_0_rx0_tx0_amplitude
subcarrier_0_rx0_tx0_phase
subcarrier_0_rx0_tx1_amplitude
subcarrier_0_rx0_tx1_phase
subcarrier_0_rx1_tx0_amplitude
subcarrier_0_rx1_tx0_phase
subcarrier_0_rx1_tx1_amplitude
subcarrier_0_rx1_tx1_phase
subcarrier_1_rx0_tx0_amplitude
subcarrier_1_rx0_tx0_phase
subcarrier_1_rx0_tx1_amplitude
subcarrier_1_rx0_tx1_phase
subcarrier_1_rx1_tx0_amplitude
subcarrier_1_rx1_tx0_phase
subcarrier_1_rx1_tx1_amplitude
subcarrier_1_rx1_tx1_phase
subcarrier_2_rx0_tx0_amplitude
subcarrier_2_rx0_tx0_phase
subcarrier_2_rx0_tx1_amplitude
subcarrier_2_rx0_tx1_phase
subcarrier_2_rx1_tx0_amplitude
subcarrier_2_rx1_tx0_phase
subcarrier_2_rx1_tx1_amplitude
subcarrier_2_rx1_tx1_phase
subcarrier_3_rx0_tx0_amplitude
subcarrier_3_rx0_tx0_phase
subcarrier_3_rx0_tx1_amplitude
subcarrier_3_rx0_tx1_phase
subcarrier_3_rx1_tx0_amplitude
subcarrier_3_rx1_tx0_phase
subcarrier_3_rx1_tx1_amplitude
subcarrier_3_rx1_tx1_phase
subcarrier_4_rx0_tx0_amplitude
subcarrier_4_rx0_tx0_phase
subcarrier_4_r

In [15]:
data.to_csv("CSV_Library/with_obstacles.csv", index=False)