In [53]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [54]:
# Load dataset
df = pd.read_csv("../data/raw/Quality of Service 5G.csv")

# 5 random samples from the dataset
print(df.sample(5))

# Display basic information about the dataset
print(df.info())
print("-" * 100 )
print(df.describe())

          Timestamp   User_ID     Application_Type Signal_Strength Latency  \
47   9/3/2023 10:00   User_48  Background_Download         -94 dBm   45 ms   
393  9/3/2023 10:06  User_394            VoIP_Call         -56 dBm   52 ms   
7    9/3/2023 10:00    User_8      IoT_Temperature         -95 dBm  100 ms   
151  9/3/2023 10:02  User_152    Emergency_Service         -58 dBm    5 ms   
322  9/3/2023 10:05  User_323        Online_Gaming         -50 dBm   38 ms   

    Required_Bandwidth Allocated_Bandwidth Resource_Allocation  
47            640 Kbps            640 Kbps                 60%  
393             0 Kbps              0 Kbps                 90%  
7              10 Kbps             15 Kbps                 50%  
151           0.8 Mbps            1.0 Mbps                 70%  
322           5.4 Mbps            5.7 Mbps                 80%  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 8 columns):
 #   Column               Non-Null Co

In [55]:
# Drop redundant columns
df = df.drop(columns=["Timestamp", "User_ID"])

# Convert object columns to numerical
df['Signal_Strength'] = df['Signal_Strength'].str.replace(' dBm', '').astype(float)
df['Latency'] = df['Latency'].str.replace(' ms', '').astype(float)
df['Resource_Allocation'] = df['Resource_Allocation'].str.replace('%', '').astype(float)

def parse_bandwidth(bandwidth_str):
    if 'Mbps' in bandwidth_str:
        return float(bandwidth_str.replace(' Mbps', ''))
    elif 'Kbps' in bandwidth_str:
        return float(bandwidth_str.replace(' Kbps', '')) / 1000
    else:
        return float(bandwidth_str)
    
df['Required_Bandwidth'] = df['Required_Bandwidth'].apply(parse_bandwidth)
df['Allocated_Bandwidth'] = df['Allocated_Bandwidth'].apply(parse_bandwidth)

In [56]:
# Plot some visualizations

# Histogram for Application types

fig = px.histogram(df, x='Application_Type', 
                   title='Distribution of Application Types', 
                   nbins=30, 
                   color='Application_Type')
fig.update_layout(
    xaxis_title='Application Type',
    yaxis_title='Count',
    bargap=0.2)
fig.show()

In [59]:
# Relationship between required bandwidth and allocated bandwidth

fig = px.scatter(df, x='Required_Bandwidth', y='Allocated_Bandwidth', color='Application_Type', title='Required vs Allocated Bandwidth')
fig.update_layout(
    xaxis_title='Required Bandwidth (Mbps)',
    yaxis_title='Allocated Bandwidth (Mbps)'
)
fig.update_xaxes(tickangle=90)
fig.show()

In [58]:
corr_matrix = df.corr(numeric_only=True)

# Visualize the correlation matrix using Plotly
fig = px.imshow(
    corr_matrix,
    text_auto=True,
    color_continuous_scale='RdBu',
    title='Correlation Matrix'
)

fig.show()