<a href="https://colab.research.google.com/github/anshupandey/AIOps-EL/blob/main/part3_standard_code_with_mlflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Predicting Quality of Service (QoS) Metrics for 5G Network Optimization

In [None]:
!pip install mlflow --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.3/27.3 MB[0m [31m57.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.8/5.8 MB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m575.1/575.1 kB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.2/203.2 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import mlflow
import mlflow.sklearn

In [None]:
!wget -q https://www.dropbox.com/scl/fi/0rncal8biwhhw5opxyxz6/Quality-of-Service-5G.xlsx?rlkey=gdoq5hw1fgc60khl3equbreyn&st=uzqgtrca&dl=0
!mv Quality-of-Service-5G.xlsx?rlkey=gdoq5hw1fgc60khl3equbreyn Quality-of-Service-5G.xlsx

In [None]:
# Load the dataset
file_path = 'Quality-of-Service-5G.xlsx'
data = pd.read_excel(file_path, sheet_name='in')
data.shape

(400, 8)

In [None]:
data.head()

Unnamed: 0,Timestamp,User_ID,Application_Type,Signal_Strength,Latency,Required_Bandwidth,Allocated_Bandwidth,Resource_Allocation
0,2023-09-03 10:00:00,User_1,Video_Call,-75 dBm,30 ms,10 Mbps,15 Mbps,0.7
1,2023-09-03 10:00:00,User_2,Voice_Call,-80 dBm,20 ms,100 Kbps,120 Kbps,0.8
2,2023-09-03 10:00:00,User_3,Streaming,-85 dBm,40 ms,5 Mbps,6 Mbps,0.75
3,2023-09-03 10:00:00,User_4,Emergency_Service,-70 dBm,10 ms,1 Mbps,1.5 Mbps,0.9
4,2023-09-03 10:00:00,User_5,Online_Gaming,-78 dBm,25 ms,2 Mbps,3 Mbps,0.85


# Data preprocessing

In [None]:

# Convert columns to numeric where applicable
data['Signal_Strength'] = data['Signal_Strength'].str.replace(' dBm', '').astype(float)
data['Latency'] = data['Latency'].str.replace(' ms', '').astype(float)
data['Required_Bandwidth'] = data['Required_Bandwidth'].str.replace(' Mbps', '').str.replace(' Kbps', '').astype(float)
data['Allocated_Bandwidth'] = data['Allocated_Bandwidth'].str.replace(' Mbps', '').str.replace(' Kbps', '').astype(float)
data.head()

Unnamed: 0,Timestamp,User_ID,Application_Type,Signal_Strength,Latency,Required_Bandwidth,Allocated_Bandwidth,Resource_Allocation
0,2023-09-03 10:00:00,User_1,Video_Call,-75.0,30.0,10.0,15.0,0.7
1,2023-09-03 10:00:00,User_2,Voice_Call,-80.0,20.0,100.0,120.0,0.8
2,2023-09-03 10:00:00,User_3,Streaming,-85.0,40.0,5.0,6.0,0.75
3,2023-09-03 10:00:00,User_4,Emergency_Service,-70.0,10.0,1.0,1.5,0.9
4,2023-09-03 10:00:00,User_5,Online_Gaming,-78.0,25.0,2.0,3.0,0.85


# Feature engineering

In [None]:
# Feature engineering
data['Bandwidth_Efficiency'] = data['Allocated_Bandwidth'] / data['Required_Bandwidth']
data['Signal_Quality_Category'] = pd.cut(data['Signal_Strength'], bins=[-100, -85, -70, 0], labels=['Weak', 'Moderate', 'Strong'])
data.head()

Unnamed: 0,Timestamp,User_ID,Application_Type,Signal_Strength,Latency,Required_Bandwidth,Allocated_Bandwidth,Resource_Allocation,Bandwidth_Efficiency,Signal_Quality_Category
0,2023-09-03 10:00:00,User_1,Video_Call,-75.0,30.0,10.0,15.0,0.7,1.5,Moderate
1,2023-09-03 10:00:00,User_2,Voice_Call,-80.0,20.0,100.0,120.0,0.8,1.2,Moderate
2,2023-09-03 10:00:00,User_3,Streaming,-85.0,40.0,5.0,6.0,0.75,1.2,Weak
3,2023-09-03 10:00:00,User_4,Emergency_Service,-70.0,10.0,1.0,1.5,0.9,1.5,Moderate
4,2023-09-03 10:00:00,User_5,Online_Gaming,-78.0,25.0,2.0,3.0,0.85,1.5,Moderate


In [None]:
data.head(20)

Unnamed: 0,Timestamp,User_ID,Application_Type,Signal_Strength,Latency,Required_Bandwidth,Allocated_Bandwidth,Resource_Allocation,Bandwidth_Efficiency,Signal_Quality_Category
0,2023-09-03 10:00:00,User_1,Video_Call,-75.0,30.0,10.0,15.0,0.7,1.5,Moderate
1,2023-09-03 10:00:00,User_2,Voice_Call,-80.0,20.0,100.0,120.0,0.8,1.2,Moderate
2,2023-09-03 10:00:00,User_3,Streaming,-85.0,40.0,5.0,6.0,0.75,1.2,Weak
3,2023-09-03 10:00:00,User_4,Emergency_Service,-70.0,10.0,1.0,1.5,0.9,1.5,Moderate
4,2023-09-03 10:00:00,User_5,Online_Gaming,-78.0,25.0,2.0,3.0,0.85,1.5,Moderate
5,2023-09-03 10:00:00,User_6,Background_Download,-90.0,50.0,500.0,550.0,0.7,1.1,Weak
6,2023-09-03 10:00:00,User_7,Web_Browsing,-88.0,30.0,1.0,1.0,0.6,1.0,Weak
7,2023-09-03 10:00:00,User_8,IoT_Temperature,-95.0,100.0,10.0,15.0,0.5,1.5,Weak
8,2023-09-03 10:00:00,User_9,Video_Streaming,-82.0,35.0,3.0,3.5,0.8,1.166667,Moderate
9,2023-09-03 10:00:00,User_10,File_Download,-75.0,45.0,2.0,2.0,0.7,1.0,Moderate


In [None]:
# Define features and targets
features = ['Application_Type', 'Signal_Strength', 'Required_Bandwidth', 'Allocated_Bandwidth', 'Signal_Quality_Category']
target_latency = 'Latency'
target_efficiency = 'Bandwidth_Efficiency'

In [None]:
# Handling infinities and large values in the dataset
data = data.replace([np.inf, -np.inf], np.nan)
print(data.shape)
data = data.dropna(subset=['Latency', 'Bandwidth_Efficiency'])
print(data.shape)

(400, 10)
(393, 10)


In [None]:
# Define target variables
y_latency = data[target_latency]
y_efficiency = data[target_efficiency]

In [None]:
# Train-test split
X = data[features]
X_train_latency, X_test_latency, y_train_latency, y_test_latency = train_test_split(X, y_latency, test_size=0.2, random_state=42)
X_train_efficiency, X_test_efficiency, y_train_efficiency, y_test_efficiency = train_test_split(X, y_efficiency, test_size=0.2, random_state=42)

In [None]:
# Preprocessing pipelines
categorical_features = ['Application_Type', 'Signal_Quality_Category']
numerical_features = ['Signal_Strength', 'Required_Bandwidth', 'Allocated_Bandwidth']

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)])

In [None]:
# Model pipelines
models = {
    'Linear Regression': Pipeline(steps=[('preprocessor', preprocessor),
                                         ('regressor', LinearRegression())]),
    'Random Forest': Pipeline(steps=[('preprocessor', preprocessor),
                                      ('regressor', RandomForestRegressor(random_state=42))])
}

In [None]:
# set tracking server
mlflow.set_tracking_uri("http://3.107.14.172:5000/")
mlflow.set_experiment("QoS_Prediction")

2024/12/10 04:33:08 INFO mlflow.tracking.fluent: Experiment with name 'QoS_Prediction' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/209821809856878572', creation_time=1733805188099, experiment_id='209821809856878572', last_update_time=1733805188099, lifecycle_stage='active', name='QoS_Prediction', tags={}>

In [None]:
# Model training and evaluation
results = {}
mlflow.sklearn.autolog()
for name, model in models.items():
  with mlflow.start_run(run_name=name+"Anshu"+str(np.random.randint(1000)),log_system_metrics=True) as run:
    print(f'Training {name} for Latency Prediction...')
    model.fit(X_train_latency, y_train_latency)
    y_pred_latency = model.predict(X_test_latency)

    print(f'Training {name} for Bandwidth Efficiency Prediction...')
    model.fit(X_train_efficiency, y_train_efficiency)
    y_pred_efficiency = model.predict(X_test_efficiency)

    # Store results
    results[name] = {
        'Latency': {
            'MAE': mean_absolute_error(y_test_latency, y_pred_latency),
            'RMSE': np.sqrt(mean_squared_error(y_test_latency, y_pred_latency)),
            'R2': r2_score(y_test_latency, y_pred_latency)
        },
        'Efficiency': {
            'MAE': mean_absolute_error(y_test_efficiency, y_pred_efficiency),
            'RMSE': np.sqrt(mean_squared_error(y_test_efficiency, y_pred_efficiency)),
            'R2': r2_score(y_test_efficiency, y_pred_efficiency)
        }
    }

# Display results
results

2024/12/10 04:37:11 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Training Linear Regression for Latency Prediction...
Training Linear Regression for Bandwidth Efficiency Prediction...
🏃 View run Linear RegressionAnshu694 at: http://3.107.14.172:5000/#/experiments/209821809856878572/runs/539b9fbfa88a40f983f86dc9436f4438
🧪 View experiment at: http://3.107.14.172:5000/#/experiments/209821809856878572


2024/12/10 04:37:31 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/12/10 04:37:31 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
2024/12/10 04:37:32 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Training Random Forest for Latency Prediction...
Training Random Forest for Bandwidth Efficiency Prediction...
🏃 View run Random ForestAnshu172 at: http://3.107.14.172:5000/#/experiments/209821809856878572/runs/a7cf201b4cde4d93913332df07b08093
🧪 View experiment at: http://3.107.14.172:5000/#/experiments/209821809856878572


2024/12/10 04:38:17 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/12/10 04:38:17 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


{'Linear Regression': {'Latency': {'MAE': 6.365094420185795,
   'RMSE': 7.946033854829205,
   'R2': 0.8076767901442934},
  'Efficiency': {'MAE': 0.1350199047317324,
   'RMSE': 0.21662304096277932,
   'R2': 0.00029450045440848616}},
 'Random Forest': {'Latency': {'MAE': 1.8966356238698008,
   'RMSE': 4.429782018774095,
   'R2': 0.9402283026952916},
  'Efficiency': {'MAE': 0.03321397254334228,
   'RMSE': 0.09458618553685297,
   'R2': 0.8094020574968452}}}

In [None]:
eval_data = X_test_latency
eval_data['target'] = y_test_latency

In [None]:
mlflow.set_experiment("Latency_Prediction")

2024/12/10 05:02:36 INFO mlflow.tracking.fluent: Experiment with name 'Latency_Prediction' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/959328149099913042', creation_time=1733806956605, experiment_id='959328149099913042', last_update_time=1733806956605, lifecycle_stage='active', name='Latency_Prediction', tags={}>

In [None]:
# Model pipelines
models = {
    'Linear Regression': Pipeline(steps=[('preprocessor', preprocessor),
                                         ('regressor', LinearRegression())]),
    'Random Forest': Pipeline(steps=[('preprocessor', preprocessor),
                                      ('regressor', RandomForestRegressor(random_state=42,max_depth=12,min_samples_leaf=3))])
}

In [None]:
mlflow.sklearn.autolog()
for name, model in models.items():
  run_name = f"Latency {name} Anshu"+str(np.random.randint(1000))
  with mlflow.start_run(run_name=run_name,log_system_metrics=True) as run:
    print(f'Training {name} for Latency Prediction...')
    model.fit(X_train_latency, y_train_latency)

    mlflow.sklearn.log_model(model,run_name)
    model_uri = mlflow.get_artifact_uri(run_name)
    y_pred_latency = model.predict(X_test_latency)

    result = mlflow.evaluate(model_uri,eval_data,targets='target',model_type='regressor')

2024/12/10 05:05:38 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Training Linear Regression for Latency Prediction...




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2024/12/10 05:05:55 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run Latency Linear Regression Anshu506 at: http://3.107.14.172:5000/#/experiments/959328149099913042/runs/16fbb0d6a0c64d02839ece333959f939
🧪 View experiment at: http://3.107.14.172:5000/#/experiments/959328149099913042


2024/12/10 05:05:56 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/12/10 05:05:57 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
2024/12/10 05:05:57 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Training Random Forest for Latency Prediction...




Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

2024/12/10 05:06:25 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


🏃 View run Latency Random Forest Anshu864 at: http://3.107.14.172:5000/#/experiments/959328149099913042/runs/1de9fbde5a7248be8ce2b70e8bba4dc7
🧪 View experiment at: http://3.107.14.172:5000/#/experiments/959328149099913042


2024/12/10 05:06:26 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/12/10 05:06:27 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!
