In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import numpy as np
import math

df = pd.read_csv("/content/'File_Name'.csv") #Ex:Data_4G.csv

# Select the desired features
selected_features = ["Distance Between Tx and Rx\n(m)", 'Frequency', 'Altitude', 'Tinggi Antena', 'Elevation Angle', 'Azimuth offset angle',
                     "Horizontal Distance of Rx from Boresight of Tx", "Vertical Distance of Rx from Boresight of Tx",
                      ]
#This is just an example of the features I suggest. You can use feature importance to identify the most relevant features. Some features, however, 
#are essential.
X_selected = df[selected_features]
y = df['Signal Level']

X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.25, random_state=42)

# Train the model using the subset of selected features
model_selected = RandomForestRegressor(random_state=42)
model_selected.fit(X_train, y_train)

# Prediction for X_train dan X_test
model_predictions_train = model_selected.predict(X_train)
model_predictions_test = model_selected.predict(X_test)

# Ensure that predictions cover all data
all_model_predictions = np.concatenate((model_predictions_train, model_predictions_test))

In [None]:
# Conversion and calculation for Okumura-Hata model are adjusted
c = 3e8
frequency_MHz = 2100 # You can adjust the frequency, but it must remain single and constant, EX:2300.
distance_m = df["Distance Between Tx and Rx\n(m)"]
height_transmitter_m = df["Tinggi Antena"] #Tinggi Antena = Antenna height
transmit_daya = 43  # Transmitter power in dBm
gain_antena_tx = 10  # Antenna gain in dB
#"The transmitter power and antenna gain used here are based on standard values commonly applied in this region. Feel free to 
#adjust these values to match the typical standards in your area."

# Path Loss Calculations
fspl_dB = 20 * np.log10((4 * np.pi * distance_m * frequency_MHz) / c)
ahre = (1.1 * np.log10(frequency_MHz) - 0.7) * 1 - (1.56 * np.log10(frequency_MHz) - 0.8) 
# 1 here represents the receiver's height, for example, holding a phone at an average height of 1 meter.
pathloss_okumura = 69.55 + 26.16 * np.log10(frequency_MHz) - 13.82 * np.log10(height_transmitter_m) - ahre + (44.9 - 6.55 * np.log10(height_transmitter_m)) * np.log10(distance_m /1000)
level_sinyal_okumura = transmit_daya - pathloss_okumura + gain_antena_tx + fspl_dB

frequency_MHz_cost231 = 2100  # Assuming frequency column is in MHz
Cm = 0  # Urban area correction factor
fspl_dB = 20 * np.log10((4 * np.pi * distance_m * frequency_MHz_cost231) / c)
ahre_cost231 = (1.1 * np.log10(frequency_MHz) - 0.7) * 1 - (1.56 * np.log10(frequency_MHz) - 0.8) 
# 1 here represents the receiver's height, for example, holding a phone at an average height of 1 meter.
pathloss_cost_231 = 46.3 + 33.9 * np.log10(frequency_MHz_cost231) - 13.82 * np.log10(df["Tinggi Antena"]) + (44.9 - 6.55 * np.log10(df["Tinggi Antena"])) * np.log10(df["Distance Between Tx and Rx\n(m)"]/1000) - ahre_cost231 + Cm

# Assuming transmit power and antenna gain are in dBm and dB respectively
level_signal_cost231 = transmit_daya - pathloss_cost_231 + gain_antena_tx + fspl_dB

# Define the parameters
fc = 2.1  # Carrier frequency in GHz
d = df["Distance Between Tx and Rx\n(m)"]  # Distance in meters
h_UT = 0.8  # Height of the user terminal in meters
P_tx = 43  # Transmit power in dBm (example value)
G_tx = 10  # Transmit antenna gain in dB (example value)
G_rx = 0   # Receive antenna gain in dB (example value)

# Calculate the LOS path loss
def path_loss_los(fc, d):
    PL_LOS = 28.0 + 22 * np.log10(d) + 20 * np.log10(fc)
    return PL_LOS

# Calculate the NLOS path loss
def path_loss_nlos(fc, d, eta):
    PL_NLOS = 36.7 * np.log10(d) + 22.7 + 26 * np.log10(fc) + eta
    return PL_NLOS
#For LOS and NLOS path loss, this was only a choice either you only want NLOS or use the two of them for comparison later

# Compute shadowing component separately
eta = np.random.normal(0, 3, len(df))

# Add the shadowing component to the DataFrame
df['Shadowing (eta)'] = eta

# Compute the path loss for each row in the DataFrame
df['Path Loss LOS (dB)'] = df.apply(lambda row: path_loss_los(fc, row['Distance Between Tx and Rx\n(m)']), axis=1)
df['Path Loss NLOS (dB)'] = df.apply(lambda row: path_loss_nlos(fc, row['Distance Between Tx and Rx\n(m)'], row['Tinggi Antena']), axis=1)

# Compute the signal level for LOS and NLOS conditions
df['Signal Level LOS (dBm)'] = df.apply(lambda row: P_tx + G_tx + G_rx - row['Path Loss LOS (dB)'], axis=1)
df['Signal Level NLOS (dBm)'] = df.apply(lambda row: P_tx + G_tx + G_rx - row['Path Loss NLOS (dB)'], axis=1)

In [None]:
# Making a dataframe
df1 = pd.DataFrame({
    'Distance (m)': distance_m,
    'Random Forest Prediction': np.concatenate((model_predictions_train, model_predictions_test)),
    'Okumura Hata': level_sinyal_okumura,
    'Dataset': df['Signal Level'].tolist(),
    'COST 231': level_signal_cost231,
    '3GPP LOS' : df['Signal Level LOS (dBm)'],
    '3GPP NLOS' : df['Signal Level NLOS (dBm)'],
    })
df_sorted = df1.sort_values(by='Distance (m)')

In [None]:
# Update the plotting section
plt.figure(figsize=(10, 6))
plt.plot(df_sorted['Distance (m)'], df_sorted['Dataset'], color='blue', label='Actual', alpha=0.5)
plt.plot(df_sorted['Distance (m)'], df_sorted['Random Forest Prediction'], color='red', label='Machine Learning Predicted', alpha=0.5)
plt.plot(df_sorted['Distance (m)'], df_sorted['Okumura Hata'], color='green', label='Okumura-Hata Predicted', alpha=0.5)
plt.plot(df_sorted['Distance (m)'], df_sorted['COST 231'], color='purple', label='COST 231 Predicted', alpha=0.5)

plt.xlabel('Distance Between Tx and Rx (m)')
plt.ylabel('Signal Level')
plt.title('Actual vs Predicted Signal Level')
plt.legend()
plt.show()

In [None]:
# First, let's define a function to calculate the moving average:
def moving_average(data, window_size):
    return data.rolling(window=window_size, min_periods=1).mean()

# Now, apply the moving average function to each prediction column:
window_size = 10  # You can adjust the window size based on your needs

# Calculate moving averages
df_sorted['Random Forest Prediction MA'] = moving_average(df_sorted['Random Forest Prediction'], window_size)
df_sorted['Okumura Hata MA'] = moving_average(df_sorted['Okumura Hata'], window_size)
df_sorted['COST 231 MA'] = moving_average(df_sorted['COST 231'], window_size)
df_sorted['Dataset MA'] = moving_average(df_sorted['Dataset'], window_size)
df_sorted['3GPP LOS MA'] = moving_average(df_sorted['3GPP LOS'], window_size)
df_sorted['3GPP NLOS MA'] = moving_average(df_sorted['3GPP NLOS'], window_size)

In [None]:
# Plot the comparison graph with moving averages
plt.figure(figsize=(10, 6))

# Plot Random Forest Prediction Moving Average
plt.plot(df_sorted['Distance (m)'], df_sorted['Random Forest Prediction MA'], label='Random Forest Prediction MA', color='blue', alpha=0.5)

# Plot Gradient Boosting Prediction Moving Average
plt.plot(df_sorted['Distance (m)'], df_sorted['Okumura Hata MA'], label='Okumura Hata MA', color='orange', alpha=0.5)

# Plot Decision Tree Prediction Moving Average
plt.plot(df_sorted['Distance (m)'], df_sorted['COST 231 MA'], label='COST 231 MA', color='green', alpha=0.5)

# Plot Actual Dataset Moving Average
plt.plot(df_sorted['Distance (m)'], df_sorted['Dataset MA'], label='Dataset MA', color='black', alpha=0.5)

#Plot 3GPP NLOS Moving Average
plt.plot(df_sorted['Distance (m)'], df_sorted['3GPP NLOS MA'], label='3GPP NLOS MA', color='red', alpha=0.5)

# Label x and y axes and title
plt.xlabel('Distance (m)')
plt.ylabel('Signal Level (dBm)')
plt.title('Comparison of Signal Level Predictions (Smoothed)')

# Add legend
plt.legend()

# Show grid
plt.grid(True)

# Show plot
plt.show()