#### 1. RSSI data extraction

In [16]:
import os
import pandas as pd
from pathlib import Path

# Find project root (goes up from notebooks directory to thesis)
project_root = Path(os.path.abspath('')).parent

# Construct data path relative to project root
data_rssi_path = project_root / 'data' / 'raw_data.csv'
data_ap_path = project_root / 'data' / 'AP_crd.csv'

# Read the CSV file (raw data)
df_raw = pd.read_csv(data_rssi_path)

#### 2. Filtering of the data related to the used scenario

In [17]:
# Filter the DataFrame to keep only the first 21 columns
cols_to_keep = df_raw.columns[:21]
df_filtered = df_raw[cols_to_keep]

# Exclude some APs that are not needed
df_filtered = df_filtered.drop(columns=['WAP103', 'WAP110'])

# Exclude some rows that are not needed (representing rooms 7, 8, 9 and bathrooms)
bathrooms = [3313, 3315, 3221, 3223, 3225]
room_7 = [4911, 4912, 4914, 4931, 4932, 4934, 4951, 4952, 4954, 4971, 4972, 4974]
room_8 = [4211, 4213, 4215, 4217, 4231, 4233, 4235, 4237, 4251, 4253, 4255, 4257, 4261, 4263, 4265, 4267]
room_9 = [4421, 4423, 4425, 4441, 4443, 4445, 4461, 4463, 4465]
hall = [6319, 6329, 6349, 6359, 63111, 63211, 63411, 63511, 63113, 63213, 63115, 63215]

# Combine all rooms and areas to exclude into one list
rooms_to_exclude = bathrooms + room_7 + room_8 + room_9 + hall

# Reference column for filtering: LABEL
df_filtered = df_filtered[~df_filtered['LABEL'].isin(rooms_to_exclude)]

# Check how many rows were removed
print(f"Original shape: {df_raw.shape}")
print(f"After column filtering: {df_filtered.shape}")

Original shape: (20004, 27)
After column filtering: (14614, 19)


##### 3. Row filtering based on count of non-100 values across WAP columns

In [18]:
# First, get the list of WAP columns we're interested in
wap_columns = [col for col in df_filtered.columns if col.startswith('WAP')]

# Create a new column that counts how many values are different from '100' in the specified range
df_filtered['non_100_count'] = df_filtered[wap_columns].apply(
    lambda row: (row != 100).sum(), axis=1
)

# Filter rows where this count is at least 3
df_filtered = df_filtered[df_filtered['non_100_count'] >= 3]

# Remove the temporary counting column if you don't need it anymore
df_filtered = df_filtered.drop(columns=['non_100_count'])

# Print the shape to verify
print(f"After non-100 filtering: {df_filtered.shape}")

# Set "DEVICE" and "LABEL" columns as categorical
df_filtered['DEVICE'] = df_filtered['DEVICE'].astype('category')
df_filtered['LABEL'] = df_filtered['LABEL'].astype('category')

After non-100 filtering: (12097, 19)


##### 4. Saving Dataframe to CSV in the "data" folder

In [19]:
# Define the output path
output_path = project_root / 'data' / 'filtered_data.csv'

# Save the filtered DataFrame to CSV
df_filtered.to_csv(output_path, index=False)

# Verify the file was created
print(f"Filtered data saved to: {output_path}")
print(f"File size: {os.path.getsize(output_path) / (1024*1024):.2f} MB")

Filtered data saved to: /home/braulio/thesis/data/filtered_data.csv
File size: 0.92 MB
