In [None]:
import os

# Check the current working directory
print("Current Working Directory:", os.getcwd())

# Change the working directory to 'my_project' on the Desktop
os.chdir('C:/Users/User/Desktop/Big data')

# Verify the change
print("New Working Directory:", os.getcwd())

In [None]:
import pandas as pd

# Load 4G performance data
performance_data = pd.read_excel('C:/Users/User/Desktop/Big data/4G_perfromance_data_atCBH.xlsx')

# Load geolocation data
geolocation_data = pd.read_excel('C:/Users/User/Desktop/Big data/geo_location_data.xlsx')

# Display the first few rows of each dataset
print(performance_data.head())
print(geolocation_data.head())

In [None]:
# Check for missing values in performance data
print(performance_data.isnull().sum())

# Fill missing values with the median or mean as appropriate
#performance_data.fillna(performance_data.median(), inplace=True)

# Normalize geolocation data (e.g., converting coordinates to a standard format)
geolocation_data['Latitude'] = geolocation_data['Latitude'].apply(lambda x: float(x))
geolocation_data['Longitue'] = geolocation_data['Longitue'].apply(lambda x: float(x))

# Display summary statistics to verify preprocessing
print(performance_data.describe())
print(geolocation_data.describe())

In [None]:
# Aggregate network performance data by site
aggregated_performance = performance_data.groupby('Site_ID').agg({
    'RRC Connected User_Opt2': 'mean',
    'DL PRB Utilization_ENG(%)': 'mean',
    'DL_User Throughput_Eng(Mbit/s)': 'mean',
    '4G RRC Rejection_ SOC': 'mean',
    'RO.RAB Failures': 'mean',
}).reset_index()

# Merge the aggregated performance data with geolocation data
merged_data = pd.merge(aggregated_performance, geolocation_data, on='Site_ID')

# Calculate additional metrics if needed
#merged_data['efficiency'] = merged_data['throughput'] / merged_data['latency']

# Display the merged dataset
print(merged_data.head())

In [None]:
import numpy as np

# Define criteria for optimal site selection (e.g., high throughput and low latency)
criteria = (
    (merged_data['DL_User Throughput_Eng(Mbit/s)'] < 3) &  # First criterion: DL User Throughput less than the 3mbps
    (merged_data['DL PRB Utilization_ENG(%)'] > 70) &  # Second criterion: UL User Throughput greater than the 70%
    (merged_data['RRC Connected User_Opt2'] > 500) &    # third crtria is connected users greater than 500                         
    (merged_data['4G RRC Rejection_ SOC'] > 100) & 
    (merged_data['RO.RAB Failures'] > 100)
     )
optimal_sites = merged_data[criteria]

# Display the optimal sites
print("Optimal sites for 5G deployment:")
print(optimal_sites)

In [None]:
# vissualtion 
import matplotlib.pyplot as plt
import seaborn as sns

# Plot site locations and performance metrics
plt.figure(figsize=(10, 6))
sns.scatterplot(data=merged_data, x='Longitue', y='Latitude', hue='DL PRB Utilization_ENG(%)', size='DL_User Throughput_Eng(Mbit/s)', palette='viridis')
plt.title('Geolocation and Performance of Sites')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend(title='DL_User Throughput_Eng(Mbit/s)')
plt.show()

# Plot histogram of throughput 
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.histplot(merged_data['DL_User Throughput_Eng(Mbit/s)'], bins=20, color='blue')
plt.title('Throughput Distribution')

plt.subplot(1, 2, 2)
sns.histplot(merged_data['DL PRB Utilization_ENG(%)'], bins=20, color='red')
plt.title('DL PRB Utilization Distribution')
plt.show()

In [None]:
# Check for missing values
print(performance_data.isnull().sum())
print(geolocation_data.isnull().sum())

# Handling missing values (example: dropping rows with missing values)
performance_data.dropna(inplace=True)
geolocation_data.dropna(inplace=True)

# Merging datasets on a common column (e.g., 'Site_ID' )
merged_data = pd.merge(performance_data, geolocation_data, on='Site_ID') 
print(merged_data.head())

In [None]:
pip install plotly

In [None]:
import plotly.express as px
import plotly.graph_objects as go

# 1. Map Visualization for Site Locations
fig_map = px.scatter_geo(
    geolocation_data,
    lat='Latitude',   
    lon='Longitue',  
    hover_name='Site_ID',  
    size='Latitude',  
    title='Site Locations'
)
fig_map.show()

# 2. Bar Chart for DL User Throughput
fig_bar = px.bar(
    performance_data,
    x='Site_ID',  # Replace with actual column for x-axis
    y='DL_User Throughput_Eng(Mbit/s)',  # Replace with the actual column name
    title='Downlink User Throughput per Site',
    labels={'DL_User Throughput_Eng(Mbit/s)': 'DL Throughput (Mbit/s)'}
)
fig_bar.show()

# 3. Line Chart for Latency Over Time
fig_bar = px.bar(
    performance_data,
    x='DL PRB Utilization_ENG(%)',  # Replace with the actual timestamp or date column
    y='DL_User Throughput_Eng(Mbit/s)',  # Replace with the actual latency column name
    title='DL Throughput Over utlization',
    labels={'DL_User Throughput_Eng(Mbit/s)': 'DL Throughput (Mbit/s)'}
)
fig_bar.show()

# 4. Heatmap for Correlation Between Metrics
import numpy as np

#import plotly.graph_objects as go

# .

# Select only numeric columns
numeric_data = merged_data.select_dtypes(include=['number'])

# Drop columns with zero variance 
numeric_data = numeric_data.loc[:, numeric_data.var() != 0]

# Calculate correlation matrix
corr_matrix = numeric_data.corr()

fig_heatmap = go.Figure(data=go.Heatmap(
    z=corr_matrix.values,
    x=corr_matrix.columns,
    y=corr_matrix.columns,
    colorscale='Viridis'
))

fig_heatmap.show()

In [None]:
pip install das

In [None]:
rom dash import Dash, dcc, html

# Initialize the Dash app
app = Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    html.H1("5G Network Performance Dashboard"),
    
    # Map
    dcc.Graph(id='map-graph', figure=fig_map),
    
    # Bar Chart
    dcc.Graph(id='bar-chart', figure=fig_bar),
    
    # Line Chart
    dcc.Graph(id='line-chart', figure=fig_line),
    
    # Heatmap
    dcc.Graph(id='heatmap', figure=fig_heatmap)
])

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)