In [17]:
import pandas as pd
import plotly.express as px

In [18]:
df = pd.read_csv('../data/raw/wireless_communication_dataset.csv')
df.describe()

Unnamed: 0,User Speed (m/s),Signal Strength (dBm),Battery Level (%),Distance from Base Station (m),Handover Events,Power Consumption (mW),Transmission Power (dBm),Throughput (Mbps),Latency (ms)
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,8.460698,-85.03816,54.9938,551.165042,0.878,563.53416,20.201844,23.543712,77.3859
std,8.36855,14.747225,26.042408,198.084621,1.209384,114.024543,6.708983,23.168469,33.297976
min,0.0,-120.0,10.0,10.0,0.0,229.65,5.0,1.0,5.0
25%,1.32,-95.2225,33.0,414.0,0.0,477.04,15.41,1.0,49.78
50%,2.74,-85.225,55.0,550.355,0.0,535.1,20.61,17.825,76.415
75%,12.9875,-74.82,78.0,687.2725,2.0,668.85,25.6125,39.26,100.685
max,27.77,-50.0,100.0,1119.09,4.0,843.58,30.0,110.45,168.07


In [19]:
df.dtypes

User Speed (m/s)                  float64
Signal Strength (dBm)             float64
Battery Level (%)                   int64
Network Congestion                 object
Distance from Base Station (m)    float64
Handover Events                     int64
Power Consumption (mW)            float64
Transmission Power (dBm)          float64
Throughput (Mbps)                 float64
Latency (ms)                      float64
RF Link Quality                    object
dtype: object

In [20]:
# Convert categorical columns to numerical using hash maps
rf_link_quality_hash_map = {'Poor': 0, 'Moderate': 1, 'Good': 2}
df['RF Link Quality'] = df['RF Link Quality'].map(rf_link_quality_hash_map)

congestion_hash_map = {'Low': 0, 'Medium': 1, 'High': 2}
df['Network Congestion'] = df['Network Congestion'].map(congestion_hash_map).astype(int)

In [21]:
df['RF Link Quality']

0       0
1       0
2       0
3       0
4       1
       ..
4995    0
4996    1
4997    2
4998    1
4999    2
Name: RF Link Quality, Length: 5000, dtype: int64

In [22]:
# Correlation matrix
corr = df.corr()

fig = px.imshow(corr, text_auto=True, aspect="auto", color_continuous_scale='RdBu_r', title='Correlation Matrix')
fig.show()
fig.write_html("../reports/correlation_matrix.html")

In [27]:
fig = px.histogram(df, x = 'RF Link Quality', nbins=4, color='RF Link Quality')
fig.update_layout(bargap=0.2)
fig.show()

In [24]:
df[df['RF Link Quality']==0]['Latency (ms)'].describe()

count    2473.000000
mean      100.688265
std        26.892531
min        18.800000
25%        83.170000
50%       101.160000
75%       121.690000
max       168.070000
Name: Latency (ms), dtype: float64

In [25]:
train_df = pd.read_csv('../data/processed/train.csv')

In [26]:
train_df['RF Link Quality'].value_counts()

RF Link Quality
0    1978
1    1378
2     644
Name: count, dtype: int64