In [None]:
import pandas as pd
import plotly.express as px

In [None]:
# Load dataset
df = pd.read_csv("../data/raw/Quality of Service 5G.csv")

# 5 random samples from the dataset
print(df.sample(5))

# Display basic information about the dataset
print(df.info())
print("-" * 100 )
print(df.describe())

In [None]:
df.describe()

In [None]:
# Remove the  "_" from column names
df.columns = df.columns.str.replace('_', ' ')

# Remove "_" in Application Type values
df['Application Type'] = df['Application Type'].str.replace('_', ' ')

In [None]:
# Drop redundant columns
df = df.drop(columns=["Timestamp", "User ID"])

# Convert object columns to numerical
df['Signal Strength'] = df['Signal Strength'].str.replace(' dBm', '').astype(float)
df['Latency'] = df['Latency'].str.replace(' ms', '').astype(float)
df['Resource Allocation'] = df['Resource Allocation'].str.replace('%', '').astype(float)

def parse_bandwidth(bandwidth_str):
    if 'Mbps' in bandwidth_str:
        return float(bandwidth_str.replace(' Mbps', ''))
    elif 'Kbps' in bandwidth_str:
        return float(bandwidth_str.replace(' Kbps', '')) / 1000
    else:
        return float(bandwidth_str)
    
df['Required Bandwidth'] = df['Required Bandwidth'].apply(parse_bandwidth)
df['Allocated Bandwidth'] = df['Allocated Bandwidth'].apply(parse_bandwidth)

In [None]:
corr_matrix = df.corr(numeric_only=True)

# Visualize the correlation matrix using Plotly
fig = px.imshow(
    corr_matrix,
    text_auto=True,
    color_continuous_scale='RdBu',
    title='Correlation Matrix'
)

fig.show()

In [None]:
# Plot some visualizations

# Histogram for Application types

fig = px.histogram(df, x='Application Type', 
                   title='Distribution of Application Types', 
                   nbins=30, 
                   color='Application Type')
fig.update_layout(
    yaxis_title='Count',
    bargap=0.2)
fig.show()

In [None]:
# Relationship between required bandwidth and allocated bandwidth

fig = px.scatter(df, x='Required Bandwidth', y='Allocated Bandwidth', color='Application Type', title='Required vs Allocated Bandwidth')
fig.update_layout(
    xaxis_title='Required Bandwidth (Mbps)',
    yaxis_title='Allocated Bandwidth (Mbps)'
)
fig.update_xaxes(tickangle=90)
fig.show()

In [None]:
fig = px.scatter(df, x = 'Signal Strength', y = 'Latency', color='Application Type', title='Signal Strength vs Latency')
fig.update_layout(
    xaxis_title='Signal Strength (dBm)',
    yaxis_title='Latency (ms)'
)
fig.show()

In [None]:
# Latency distribution by Application Type
fig = px.box(df, x='Application Type', y='Latency', color='Application Type', title='Latency Distribution by Application Type')
fig.update_layout(
    yaxis_title='Latency (ms)',
    xaxis_title='Application Type'
)
fig.update_xaxes(tickangle=45)
fig.show()

In [None]:
# Resource Allocation distribution
fig = px.histogram(df, x='Resource Allocation', nbins=30, title='Resource Allocation Distribution', color_discrete_sequence=['indianred'])
fig.update_layout(
    xaxis_title='Resource Allocation (%)',
    yaxis_title='Count',
    bargap=0.2
)

In [None]:
# Resource Allocation by Application Type
fig = px.box(df, x='Application Type', y='Resource Allocation', color='Application Type', title='Resource Allocation by Application Type')
fig.update_layout(
    yaxis_title='Resource Allocation (%)',
    xaxis_title='Application Type'
)
fig.update_xaxes(tickangle=45)
fig.show()

In [None]:
# Resouce Allocation vs Latency
fig = px.scatter(df, x='Resource Allocation', y='Latency', color='Application Type', title='Resource Allocation vs Latency')
fig.update_layout(
    xaxis_title='Resource Allocation (%)',
    yaxis_title='Latency (ms)'
)
fig.show()