In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

In [None]:
# Load the dataset
file_path = '/mnt/data/Quality of Service 5G.xlsx'
data = pd.read_excel(file_path, sheet_name='in')

In [None]:
# Data preprocessing
# Convert columns to numeric where applicable
data['Signal_Strength'] = data['Signal_Strength'].str.replace(' dBm', '').astype(float)
data['Latency'] = data['Latency'].str.replace(' ms', '').astype(float)
data['Required_Bandwidth'] = data['Required_Bandwidth'].str.replace(' Mbps', '').str.replace(' Kbps', '').astype(float)
data['Allocated_Bandwidth'] = data['Allocated_Bandwidth'].str.replace(' Mbps', '').str.replace(' Kbps', '').astype(float)


In [None]:
# Feature engineering
data['Bandwidth_Efficiency'] = data['Allocated_Bandwidth'] / data['Required_Bandwidth']
data['Signal_Quality_Category'] = pd.cut(data['Signal_Strength'], bins=[-100, -85, -70, 0], labels=['Weak', 'Moderate', 'Strong'])

In [None]:
# Define features and targets
features = ['Application_Type', 'Signal_Strength', 'Required_Bandwidth', 'Allocated_Bandwidth', 'Signal_Quality_Category']
target_latency = 'Latency'
target_efficiency = 'Bandwidth_Efficiency'

In [None]:

# Handling infinities and large values in the dataset
data = data.replace([np.inf, -np.inf], np.nan)
data = data.dropna(subset=['Latency', 'Bandwidth_Efficiency'])

In [None]:

# Define target variables
y_latency = data[target_latency]
y_efficiency = data[target_efficiency]

In [None]:
# Train-test split
X = data[features]
X_train_latency, X_test_latency, y_train_latency, y_test_latency = train_test_split(X, y_latency, test_size=0.2, random_state=42)
X_train_efficiency, X_test_efficiency, y_train_efficiency, y_test_efficiency = train_test_split(X, y_efficiency, test_size=0.2, random_state=42)

In [None]:
# Preprocessing pipelines
categorical_features = ['Application_Type', 'Signal_Quality_Category']
numerical_features = ['Signal_Strength', 'Required_Bandwidth', 'Allocated_Bandwidth']

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)])

In [None]:
# Model pipelines
models = {
    'Linear Regression': Pipeline(steps=[('preprocessor', preprocessor),
                                         ('regressor', LinearRegression())]),
    'Random Forest': Pipeline(steps=[('preprocessor', preprocessor),
                                      ('regressor', RandomForestRegressor(random_state=42))])
}

In [None]:
# Model training and evaluation
results = {}
for name, model in models.items():
    print(f'Training {name} for Latency Prediction...')
    model.fit(X_train_latency, y_train_latency)
    y_pred_latency = model.predict(X_test_latency)

    print(f'Training {name} for Bandwidth Efficiency Prediction...')
    model.fit(X_train_efficiency, y_train_efficiency)
    y_pred_efficiency = model.predict(X_test_efficiency)

    # Store results
    results[name] = {
        'Latency': {
            'MAE': mean_absolute_error(y_test_latency, y_pred_latency),
            'RMSE': np.sqrt(mean_squared_error(y_test_latency, y_pred_latency)),
            'R2': r2_score(y_test_latency, y_pred_latency)
        },
        'Efficiency': {
            'MAE': mean_absolute_error(y_test_efficiency, y_pred_efficiency),
            'RMSE': np.sqrt(mean_squared_error(y_test_efficiency, y_pred_efficiency)),
            'R2': r2_score(y_test_efficiency, y_pred_efficiency)
        }
    }

# Display results
results