# WiFi Signal Strength Analysis for Indoor Localization
This project analyzes WiFi RSSI data to determine if WiFi can be used for indoor positioning.

Load Libraries (Code Cell)

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import datetime

Load and Process WiFi Data

In [2]:
# Define folder path for WiFi scan files
folder_path = "../data"

# Ensure the results directory exists
results_folder = "../results"
if not os.path.exists(results_folder):
    os.makedirs(results_folder)

# Dictionary to store MAC → SSID mapping (since JSON format lacks SSID)
mac_to_ssid = {}

# Initialize an empty list to store processed data
wifi_data = []

# Function to process WiFi text files
def process_wifi_file(file_path, location_name):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    scan_id = 0  # Differentiate 10 scans per location
    seen_macs = set()  # Store seen MAC addresses per scan to avoid duplicates

    for line in lines:
        # Extract MAC Address, RSSI, SSID from comma-separated format
        csv_match = re.search(r'(\d+),\d+,(-?\d+),.*?,.*?,.*?,.*?,([\dA-Fa-f:]+),(.+)', line)

        # Extract MAC Address and RSSI from JSON format (which lacks SSID)
        json_match = re.search(r'{"macAddress": "([\dA-Fa-f:]+)",\s*"signalStrength": (-?\d+),', line)

        # Extract timestamp if available
        time_match = re.search(r'(\d{2}:\d{2}:\d{2})', line)

        # If we find a valid comma-separated line with SSID
        if csv_match:
            mac_address = csv_match.group(3)
            rssi = int(csv_match.group(2))
            ssid = csv_match.group(4).strip()  # Extract SSID

            # Store MAC to SSID mapping (for JSON entries later)
            mac_to_ssid[mac_address] = ssid  

            # Ensure MAC is counted only once per scan
            if mac_address not in seen_macs:
                seen_macs.add(mac_address)
                timestamp = time_match.group(1) if time_match else datetime.datetime.now().strftime("%H:%M:%S")
                wifi_data.append([location_name, scan_id, mac_address, rssi, ssid, timestamp])

        # If we find a JSON-style entry (missing SSID), use stored mapping
        elif json_match:
            mac_address = json_match.group(1)
            rssi = int(json_match.group(2))
            ssid = mac_to_ssid.get(mac_address, "Unknown")  # Retrieve SSID from previous entries

            # Ensure MAC is counted only once per scan
            if mac_address not in seen_macs:
                seen_macs.add(mac_address)
                timestamp = time_match.group(1) if time_match else datetime.datetime.now().strftime("%H:%M:%S")
                wifi_data.append([location_name, scan_id, mac_address, rssi, ssid, timestamp])

        if "We should be done now" in line:
            scan_id += 1  # Move to next scan
            seen_macs.clear()  # Reset seen MAC addresses for the new scan

# Process all text files in the data folder
for filename in os.listdir(folder_path):
    if filename.endswith(".txt"):
        location_name = filename.replace(".txt", "")  # Extract location name
        process_wifi_file(os.path.join(folder_path, filename), location_name)

# Convert to Pandas DataFrame
df = pd.DataFrame(wifi_data, columns=["Location", "Scan_ID", "MAC_Address", "RSSI", "SSID", "Timestamp"])

# Save to CSV for easy access
csv_path = os.path.join(results_folder, "combined_wifi_data.csv")
df.to_csv(csv_path, index=False)

# Display first few rows
print("Processed Data Saved to:", csv_path)
df.head()

Processed Data Saved to: ../results/combined_wifi_data.csv


Unnamed: 0,Location,Scan_ID,MAC_Address,RSSI,SSID,Timestamp
0,GBlock_1F_Washroom,0,84:d4:7e:e6:24:c0,-56,airuc-guest,13:02:27
1,GBlock_1F_Washroom,0,84:d4:7e:e6:24:c1,-55,airuc-secure,13:02:27
2,GBlock_1F_Washroom,0,84:d4:7e:f2:fd:c0,-76,airuc-guest,13:02:27
3,GBlock_1F_Washroom,0,84:d4:7e:f2:fd:c1,-74,airuc-secure,13:02:27
4,GBlock_1F_Washroom,0,84:d4:7e:f2:fd:c2,-75,eduroam,13:02:27


Analyzing WiFi Signal Strength

 ## Answers to Lab Questions (A to I)
- **(A) Number of APs per location:** X APs detected per location.
- **(B) Unique APs:** X unique access points identified.
- **(C) Variability in RSSI:** Mean and standard deviation computed.
- **(D) Can MAC addresses distinguish locations?** Yes/No, based on analysis.
- **(E) Can RSSI levels distinguish locations?** Yes/No, explanation.
- **(F) Can you tell North/South?** Yes/No.
- **(G) Can you tell the floor?** Yes/No.
- **(H) Is WiFi suitable for indoor positioning?** Discussion.
- **(I) Identifying an unknown location:** Compared to dataset X, we identified location Y.


Plot WiFi Signal Strength on Floor Plan

In [3]:
# Count unique APs (MAC Addresses) per location
ap_counts = df.groupby("Location")["MAC_Address"].nunique().reset_index()

# Add 1 to ensure counting starts from 1 instead of 0
ap_counts["MAC_Address"] = ap_counts["MAC_Address"] + 1

# Rename columns for clarity
ap_counts.columns = ["Location", "Number_of_Access_Points"]

# Display the result
from IPython.display import display
display(ap_counts)


Unnamed: 0,Location,Number_of_Access_Points
0,EBlock_1F_ENE131,32
1,EBlock_3F_ENE329,17
2,GBlock_1F_DeanOffice,22
3,GBlock_1F_Washroom,42
4,GBlock_2F_ENC201,27
