In [1]:
import pandas as pd
import plotly.express as px

In [2]:
df = pd.read_csv('../data/raw/wireless_communication_dataset.csv')
df.describe()

Unnamed: 0,User Speed (m/s),User Direction (degrees),Handover Events,Distance from Base Station (m),Signal Strength (dBm),SNR (dB),BER,PDR (%),Throughput (Mbps),Latency (ms),Retransmission Count,Power Consumption (mW),Battery Level (%),Transmission Power (dBm)
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,14.90496,176.935365,1.9842,512.396017,-69.356608,17.327786,0.024978,75.003711,50.079416,50.77896,4.5902,548.908184,52.553166,15.07318
std,8.68901,102.821943,1.413206,284.558375,17.363416,7.117177,0.014432,14.52677,28.660555,28.510035,2.881167,259.080882,27.308025,8.57089
min,0.000349,0.019018,0.0,10.24993,-99.997113,5.002756,0.0001,50.029738,1.010366,1.038354,0.0,100.24311,5.070177,0.005138
25%,7.315884,88.972527,1.0,269.26112,-84.140191,11.15818,0.012382,62.571999,24.967591,26.329282,2.0,329.907583,28.848982,7.703841
50%,15.000259,174.949767,2.0,517.65359,-69.292303,17.420325,0.025108,74.935575,50.048642,51.373968,5.0,543.90323,53.066443,14.984982
75%,22.443026,264.013775,3.0,757.816264,-54.352533,23.269593,0.037443,87.864276,75.267023,75.210913,7.0,772.916947,76.578555,22.526188
max,29.99153,359.821868,4.0,999.925579,-40.005941,29.99239,0.049989,99.994528,99.995738,99.984289,9.0,999.72089,99.963795,29.996253


In [3]:
df.dtypes

User Speed (m/s)                  float64
User Direction (degrees)          float64
Handover Events                     int64
Distance from Base Station (m)    float64
Signal Strength (dBm)             float64
SNR (dB)                          float64
BER                               float64
Modulation Scheme                  object
PDR (%)                           float64
Network Congestion                 object
Throughput (Mbps)                 float64
Latency (ms)                      float64
Retransmission Count                int64
Power Consumption (mW)            float64
Battery Level (%)                 float64
Transmission Power (dBm)          float64
RF Link Quality                    object
dtype: object

In [4]:
# Convert categorical columns to numerical using hash maps
df = df[df['RF Link Quality'] != '0']
rf_link_quality_hash_map = {'Poor': 0, 'Moderate': 1, 'Good': 2}
df['RF Link Quality'] = df['RF Link Quality'].map(rf_link_quality_hash_map).astype(int)

modulation_scheme_hash_map = {'BPSK': 0, 'QPSK': 1, '16-QAM': 2, '64-QAM': 3}
df['Modulation Scheme'] = df['Modulation Scheme'].map(modulation_scheme_hash_map).astype(int)

congestion_hash_map = {'Low': 0, 'Medium': 1, 'High': 2}
df['Network Congestion'] = df['Network Congestion'].map(congestion_hash_map).astype(int)

In [5]:
# Correlation matrix
corr = df.corr()

fig = px.imshow(corr, text_auto=True, aspect="auto", color_continuous_scale='RdBu_r', title='Correlation Matrix')
fig.show()
fig.write_html("../reports/correlation_matrix.html")

In [6]:
df['RF Link Quality'].value_counts()

RF Link Quality
0    3357
1     833
2     145
Name: count, dtype: int64

In [7]:
train_df = pd.read_csv('../data/processed/train.csv')

In [8]:
train_df['RF Link Quality'].value_counts()

RF Link Quality
0.0    2703
0.5     658
1.0     107
Name: count, dtype: int64

In [9]:
train_augmented = pd.read_csv('../data/synthetic/train_augmented.csv')
train_augmented['RF Link Quality'].value_counts()

RF Link Quality
0.0    2703
0.5    2000
1.0    2000
Name: count, dtype: int64