# BLE Scanner Data Analysis

This notebook provides a comprehensive analysis of BLE scanning data collected from our custom scanner.

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from utils import *

%matplotlib inline
plt.style.use('default')  # Using default matplotlib style\n",
sns.set_theme()  # Apply seaborn styling\n",
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 100
plt.rcParams['font.size'] = 12

## 1. Data Loading and Initial Processing

In [None]:
# Connect to MongoDB
collection = connect_mongodb()

# Query last 24 hours of data
end_date = datetime.now()
start_date = end_date - timedelta(days=1)
data = query_data_by_date(collection, start_date, end_date)

# Process data into DataFrame
df = process_buffer_data(data)
print(f"Loaded {len(df)} records from {len(data)} buffers")

## 2. Temporal Analysis

In [None]:
temporal_stats = get_temporal_analysis(df)
print("\nTemporal Statistics:")
for key, value in temporal_stats.items():
    print(f"{key}: {value}")

# Plot temporal patterns
plot_temporal_patterns(df)

## 3. Device Analysis

In [None]:
device_stats = get_device_analysis(df)
print("\nDevice Statistics:")
for key, value in device_stats.items():
    print(f"{key}: {value}")

# Plot device patterns
plot_device_patterns(df)

## 4. RSSI Analysis

In [None]:
# RSSI analysis by time of day
plt.figure(figsize=(15, 6))
sns.boxplot(data=df, x='hour', y='rssi')
plt.title('RSSI Distribution by Hour')
plt.xlabel('Hour of Day')
plt.ylabel('RSSI (dBm)')

## 5. Device Persistence Analysis

In [None]:
# Calculate device persistence
device_persistence = df.groupby('mac').agg({
    'timestamp': ['min', 'max', 'count'],
    'rssi': ['mean', 'std']
}).reset_index()

device_persistence.columns = ['mac', 'first_seen', 'last_seen', 'appearances', 'mean_rssi', 'std_rssi']
device_persistence['duration'] = device_persistence['last_seen'] - device_persistence['first_seen']

# Plot persistence patterns
plt.figure(figsize=(15, 6))
sns.scatterplot(data=device_persistence, x='duration', y='appearances', alpha=0.5)
plt.title('Device Persistence vs Appearances')
plt.xlabel('Duration Present (hours)')
plt.ylabel('Number of Appearances')