In [None]:
%matplotlib inline

import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_log_error

In [None]:
#Set parameters to see all data
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 1000)

In [None]:
# Read dataset

%%time
nb_file_path = 'NB_Prediction_Results.csv'

try:
    nb_pred_df = pd.read_csv(nb_file_path)
except pd.errors.ParserError as e:
    print(f'Error while parsing CSV file: {e}')



nb_pred_df.shape

CPU times: user 9.53 ms, sys: 2.72 ms, total: 12.2 ms
Wall time: 14 ms


(4897, 7)

In [None]:
# Read dataset

%%time
rf_file_path = 'RF_Prediction_Results.csv'

try:
    rf_pred_df = pd.read_csv(rf_file_path)
except pd.errors.ParserError as e:
    print(f'Error while parsing CSV file: {e}')



rf_pred_df.shape

CPU times: user 7.66 ms, sys: 851 µs, total: 8.51 ms
Wall time: 8.95 ms


(4897, 5)

In [None]:
rf_pred_df.columns

Index(['company', 'agent', 'arrivaldate', 'adr', 'Random_Forest_Prodecitons'], dtype='object')

In [None]:
# Merge the dataframes on common columns
results_df = pd.merge(nb_pred_df, rf_pred_df, on=['company', 'agent', 'arrivaldate', 'adr'])

# Display the merged dataframe
print(results_df.shape)


(19653, 8)


In [None]:
results_df.head()

Unnamed: 0,company,agent,arrivaldate,adr,Naive_Bias_Probability,Naive_Bias_Probability_Isotonic_Clb,Naive_Bias_Probability_Sigmoid_Clb,Random_Forest_Prodecitons
0,,240,2017-08-01,209.0,307,80,90,100
1,,240,2017-08-01,230.0,292,80,90,80
2,,240,2017-08-01,230.0,292,80,90,80
3,,240,2017-08-01,230.0,292,80,90,80
4,,240,2017-08-01,230.0,292,80,90,80


In [None]:
# Class for error(RMSE, MAE, MAPE) calculation
class ErrorMetricsCalculator:
    def __init__(self, y_true, y_pred):
        """
        Initialize the ErrorMetricsCalculator class.

        Parameters:
        - y_true: Actual values.
        - y_pred: Predicted values.
        """
        self.y_true = y_true
        self.y_pred = y_pred

    def calculate_rmse(self):
        """
        Calculate the Root Mean Squared Error (RMSE).
        """
        return np.sqrt(np.mean((self.y_true - self.y_pred) ** 2))

    def calculate_mae(self):
        """
        Calculate the Mean Absolute Error (MAE).
        """
        return np.mean(np.abs(self.y_true - self.y_pred))

    def calculate_mape(self):
        """
        Calculate the Mean Absolute Percentage Error (MAPE).
        """
        return np.mean(np.abs((self.y_true - self.y_pred) / self.y_true)) * 100

In [None]:
# Using the ErrorMetricsCalculator class to calculate error metrics
nb_calculator = ErrorMetricsCalculator(results_df['adr'], results_df['Naive_Bias_Probability'])

# Calculate RMSE
rmse_result_nb = nb_calculator.calculate_rmse()

# Calculate MAE
mae_result_nb = nb_calculator.calculate_mae()

# Calculate MAPE
mape_result_nb = nb_calculator.calculate_mape()

In [None]:
# Print the results
print("For Naive Bias RMSE:", rmse_result_nb)
print("For Naive Bias MAE:", mae_result_nb)
print("For Naive Bias MAPE:", mape_result_nb)

For Naive Bias RMSE: 108.89954378249345
For Naive Bias MAE: 93.6049285096423
For Naive Bias MAPE: 76.10069363818457


In [None]:
# Using the ErrorMetricsCalculator class to calculate error metrics
isotonic_calculator = ErrorMetricsCalculator(results_df['adr'], results_df['Naive_Bias_Probability_Isotonic_Clb'])

# Calculate RMSE
rmse_result_isotonic = isotonic_calculator.calculate_rmse()

# Calculate MAE
mae_result_isotonic = isotonic_calculator.calculate_mae()

# Calculate MAPE
mape_result_isotonic = isotonic_calculator.calculate_mape()

In [None]:
# Print the results
print("For isotonic naive bias RMSE:", rmse_result_isotonic)
print("For isotonic naive bias MAE:", mae_result_isotonic)
print("For isotonic naive bias MAPE:", mape_result_isotonic)

For isotonic naive bias RMSE: 65.69536875694143
For isotonic naive bias MAE: 42.12484302650995
For isotonic naive bias MAPE: 26.640287921885797


In [None]:
# Using the ErrorMetricsCalculator class to calculate error metrics
sigmoid_calculator = ErrorMetricsCalculator(results_df['adr'], results_df['Naive_Bias_Probability_Sigmoid_Clb'])

# Calculate RMSE
rmse_result_sigmoid = sigmoid_calculator.calculate_rmse()

# Calculate MAE
mae_result_sigmoid = sigmoid_calculator.calculate_mae()

# Calculate MAPE
mape_result_sigmoid = sigmoid_calculator.calculate_mape()

In [None]:
# Print the results
print("For sigmoid naive bias RMSE:", rmse_result_sigmoid)
print("For sigmoid naive bias MAE:", mae_result_sigmoid)
print("For sigmoid naive bias MAPE:", mape_result_sigmoid)

For sigmoid naive bias RMSE: 61.226181927188335
For sigmoid naive bias MAE: 40.67517223833511
For sigmoid naive bias MAPE: 25.622779394154886


In [None]:
# Using the ErrorMetricsCalculator class to calculate error metrics
rf_calculator = ErrorMetricsCalculator(results_df['adr'], results_df['Random_Forest_Prodecitons'])

# Calculate RMSE
rmse_result_rf = rf_calculator.calculate_rmse()

# Calculate MAE
mae_result_rf = rf_calculator.calculate_mae()

# Calculate MAPE
mape_result_rf = rf_calculator.calculate_mape()

In [None]:
# Print the results
print("For random forest RMSE:", rmse_result_rf)
print("For random forest MAE:", mae_result_rf)
print("For random forest MAPE:", mape_result_rf)

For random forest RMSE: 47.35357488934488
For random forest MAE: 27.220608558489797
For random forest MAPE: 17.605442618196946


In [None]:
# Method for MAPE calculation for different thresholds
def calculate_mape(column1, column2, threshold):
    """
    Calculate the Mean Absolute Percentage Error (MAPE) between two columns of a DataFrame
    and return the percentage of rows with MAPE below the specified threshold.

    Parameters:
    - column1: First column of the DataFrame.
    - column2: Second column of the DataFrame.
    - threshold: Threshold value for MAPE.

    Returns:
    - Percentage of rows with MAPE below the threshold.
    """
    # Calculate MAPE for each row
    mape_values = (abs(column1 - column2) / column1) * 100

    # Count the number of rows with MAPE below the threshold
    num_rows_below_threshold = (mape_values < threshold).sum()

    # Calculate the percentage of rows below the threshold
    percentage_below_threshold = (num_rows_below_threshold / len(column1)) * 100

    return percentage_below_threshold

# For Naive Bias

In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 10  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Naive_Bias_Probability'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 10%: 18.450109398056277%


In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 25  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Naive_Bias_Probability'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 25%: 21.34025339642803%


In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 50  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Naive_Bias_Probability'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 50%: 27.822724266015364%


# Isotonic Naive Bias

In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 10  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Naive_Bias_Probability_Isotonic_Clb'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 10%: 31.969673841143848%


In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 25  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Naive_Bias_Probability_Isotonic_Clb'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 25%: 64.1734086399023%


In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 50  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Naive_Bias_Probability_Isotonic_Clb'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 50%: 79.69775606777591%


# Sigmiod Naive Bias

In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 10  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Naive_Bias_Probability_Sigmoid_Clb'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 10%: 26.759273393375054%


In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 25  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Naive_Bias_Probability_Sigmoid_Clb'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 25%: 57.14140334808935%


In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 50  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Naive_Bias_Probability_Sigmoid_Clb'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 50%: 84.17544395257721%


# Random Forest

In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 10  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Random_Forest_Prodecitons'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 10%: 55.543682898285255%


In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 25  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Random_Forest_Prodecitons'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 25%: 74.18205871877068%


In [None]:
# Call the function with your DataFrame columns and threshold
threshold = 50  # Example threshold value
percentage = calculate_mape(results_df['adr'], results_df['Random_Forest_Prodecitons'], threshold)

print(f"Percentage of rows with MAPE below {threshold}%: {percentage}%")

Percentage of rows with MAPE below 50%: 91.78751335673942%
