# Import Required Libraries
Import libraries such as NumPy, pandas, SciPy, and scikit-learn for calculations and dataset generation.

In [1]:
# Import necessary libraries
import numpy as np  # For numerical operations and random dataset generation
import pandas as pd  # For handling datasets
from scipy.stats import ks_2samp  # For Kolmogorov-Smirnov test
from sklearn.metrics import (  # For regression performance metrics
    mean_absolute_error,
    mean_squared_error,
    mean_squared_log_error,
    max_error,
    r2_score
)

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


# Generate Random Datasets
Generate random datasets for true values (y_true) and predicted values (y_pred) using NumPy.

In [2]:
# Generate random datasets for true values (y_true) and predicted values (y_pred)
np.random.seed(42)  # Set seed for reproducibility

# Generate random true values (y_true) and predicted values (y_pred)
y_true = np.random.uniform(50, 150, 100)  # True values between 50 and 150
y_pred = y_true + np.random.normal(0, 10, 100)  # Predicted values with added noise

# Convert to pandas DataFrame for better visualization (optional)
data = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
data.head()  # Display the first few rows of the dataset

Unnamed: 0,y_true,y_pred
0,87.454012,88.324483
1,145.071431,142.081357
2,123.199394,124.117002
3,109.865848,89.990159
4,65.601864,63.405145


# Calculate MAPE (Mean Absolute Percentage Error)
Implement the formula for MAPE and calculate it using y_true and y_pred.

In [3]:
# Calculate MAPE (Mean Absolute Percentage Error)
def calculate_mape(y_true, y_pred):
    """
    Calculate Mean Absolute Percentage Error (MAPE).
    
    Parameters:
        y_true (array-like): True values.
        y_pred (array-like): Predicted values.
    
    Returns:
        float: MAPE value.
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Calculate MAPE using the generated datasets
mape = calculate_mape(y_true, y_pred)
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 8.30%


# Perform Kolmogorov-Smirnov Test
Use SciPy's ks_2samp function to perform the Kolmogorov-Smirnov test between y_true and y_pred.

In [5]:
# Perform Kolmogorov-Smirnov Test
# Use SciPy's ks_2samp function to perform the Kolmogorov-Smirnov test between y_true and y_pred
ks_statistic, p_value = ks_2samp(y_true, y_pred)

# Display the results
print(f"Kolmogorov-Smirnov Test Statistic: {ks_statistic:.4f}")
print(f"P-value: {p_value:.4f}")

#print if the null hypothesis is rejected or not
if p_value < 0.05:
    print("Reject the null hypothesis: The distributions are different.")
else:
    print("Fail to reject the null hypothesis: The distributions are the same.")

Kolmogorov-Smirnov Test Statistic: 0.0700
P-value: 0.9684
Fail to reject the null hypothesis: The distributions are the same.


# Calculate MAE (Mean Absolute Error)
Use scikit-learn's mean_absolute_error function to calculate MAE.

In [6]:
from sklearn.metrics import mean_absolute_error

# Calculate MAE (Mean Absolute Error)
mae = mean_absolute_error(y_true, y_pred)

# Display the result
print(f"Mean Absolute Error (MAE): {mae:.2f}")

Mean Absolute Error (MAE): 7.21


# Calculate Percent Error
Implement the formula for percent error and calculate it using y_true and y_pred.

In [7]:
# Calculate Percent Error
def calculate_percent_error(y_true, y_pred):
    """
    Calculate Percent Error.
    
    Parameters:
        y_true (array-like): True values.
        y_pred (array-like): Predicted values.
    
    Returns:
        array-like: Percent error for each prediction.
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return ((y_true - y_pred) / y_true) * 100

# Calculate Percent Error using the generated datasets
percent_error = calculate_percent_error(y_true, y_pred)

# Add Percent Error to the DataFrame for better visualization
data['percent_error'] = percent_error

# Display the first few rows with Percent Error
data.head()

Unnamed: 0,y_true,y_pred,percent_error
0,87.454012,88.324483,-0.995347
1,145.071431,142.081357,2.061104
2,123.199394,124.117002,-0.744815
3,109.865848,89.990159,18.090871
4,65.601864,63.405145,3.348562


# Calculate R² Coefficient of Determination
Use scikit-learn's r2_score function to calculate the R² coefficient of determination.

In [8]:
from sklearn.metrics import r2_score

# Calculate R² Coefficient of Determination
r2 = r2_score(y_true, y_pred)

# Display the result
print(f"R² Coefficient of Determination: {r2:.4f}")

R² Coefficient of Determination: 0.9058


# Calculate WAPE (Weighted Absolute Percentage Error)
Implement the formula for WAPE and calculate it using y_true and y_pred.

In [9]:
# Calculate WAPE (Weighted Absolute Percentage Error)
def calculate_wape(y_true, y_pred):
    """
    Calculate Weighted Absolute Percentage Error (WAPE).
    
    Parameters:
        y_true (array-like): True values.
        y_pred (array-like): Predicted values.
    
    Returns:
        float: WAPE value.
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.sum(np.abs(y_true - y_pred)) / np.sum(np.abs(y_true)) * 100

# Calculate WAPE using the generated datasets
wape = calculate_wape(y_true, y_pred)

# Display the result
print(f"Weighted Absolute Percentage Error (WAPE): {wape:.2f}%")

Weighted Absolute Percentage Error (WAPE): 7.43%


# Calculate Max Error
Use scikit-learn's max_error function to calculate the maximum error.

In [10]:
from sklearn.metrics import max_error

# Calculate Max Error
max_err = max_error(y_true, y_pred)

# Display the result
print(f"Max Error: {max_err:.2f}")

Max Error: 24.63


# Calculate MSE (Mean Squared Error)
Use scikit-learn's mean_squared_error function to calculate MSE.

In [11]:
# Calculate MSE (Mean Squared Error)
# Use scikit-learn's mean_squared_error function to calculate MSE
mse = mean_squared_error(y_true, y_pred)

# Display the result
print(f"Mean Squared Error (MSE): {mse:.2f}")

Mean Squared Error (MSE): 82.51


# Calculate MSLE (Mean Squared Logarithmic Error)
Use scikit-learn's mean_squared_log_error function to calculate MSLE.

In [12]:
# Calculate MSLE (Mean Squared Logarithmic Error)
# Use scikit-learn's mean_squared_log_error function to calculate MSLE
msle = mean_squared_log_error(y_true, y_pred)

# Display the result
print(f"Mean Squared Logarithmic Error (MSLE): {msle:.4f}")

Mean Squared Logarithmic Error (MSLE): 0.0113


# Calculate RMSE (Root Mean Squared Error)
Calculate RMSE by taking the square root of MSE.

In [13]:
# Calculate RMSE (Root Mean Squared Error)
def calculate_rmse(mse):
    """
    Calculate Root Mean Squared Error (RMSE) from Mean Squared Error (MSE).
    
    Parameters:
        mse (float): Mean Squared Error value.
    
    Returns:
        float: RMSE value.
    """
    return np.sqrt(mse)

# Calculate RMSE using the previously calculated MSE
rmse = calculate_rmse(mse)

# Display the result
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

Root Mean Squared Error (RMSE): 9.08


# Calculate RMSLE (Root Mean Squared Logarithmic Error)
Calculate RMSLE by taking the square root of MSLE.

In [14]:
# Calculate RMSLE (Root Mean Squared Logarithmic Error)
def calculate_rmsle(msle):
    """
    Calculate Root Mean Squared Logarithmic Error (RMSLE) from Mean Squared Logarithmic Error (MSLE).
    
    Parameters:
        msle (float): Mean Squared Logarithmic Error value.
    
    Returns:
        float: RMSLE value.
    """
    return np.sqrt(msle)

# Calculate RMSLE using the previously calculated MSLE
rmsle = calculate_rmsle(msle)

# Display the result
print(f"Root Mean Squared Logarithmic Error (RMSLE): {rmsle:.4f}")

Root Mean Squared Logarithmic Error (RMSLE): 0.1065


## Mean Gamma Deviance

In [16]:
#Calculate mean gamma deviance  
def calculate_mean_gamma_deviance(y_true, y_pred):
    """
    Calculate Mean Gamma Deviance.
    
    Parameters:
        y_true (array-like): True values.
        y_pred (array-like): Predicted values.
    
    Returns:
        float: Mean Gamma Deviance value.
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(2 * (np.log(y_pred / y_true) - (y_pred / y_true) + 1))

# Calculate Mean Gamma Deviance using the generated datasets
mean_gamma_deviance = calculate_mean_gamma_deviance(y_true, y_pred)

# Display the result
print(f"Mean Gamma Deviance: {mean_gamma_deviance:.4f}")



Mean Gamma Deviance: -0.0118


## Mean Poisson Deviance

In [17]:
#Calculate mean poisson deviance
def calculate_mean_poisson_deviance(y_true, y_pred):
    """
    Calculate Mean Poisson Deviance.
    
    Parameters:
        y_true (array-like): True values.
        y_pred (array-like): Predicted values.
    
    Returns:
        float: Mean Poisson Deviance value.
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(2 * (y_pred - y_true * np.log(y_pred)))

# Calculate Mean Poisson Deviance using the generated datasets
mean_poisson_deviance = calculate_mean_poisson_deviance(y_true, y_pred)

# Display the result
print(f"Mean Poisson Deviance: {mean_poisson_deviance:.4f}")



Mean Poisson Deviance: -701.8994


## Calcuate the D2 Teeedie Score for regression scoring


In [18]:
#Calculate mean tweedie deviance
def calculate_mean_tweedie_deviance(y_true, y_pred, p=0):
    """
    Calculate Mean Tweedie Deviance.
    
    Parameters:
        y_true (array-like): True values.
        y_pred (array-like): Predicted values.
        p (int, float): Tweedie power parameter.
    
    Returns:
        float: Mean Tweedie Deviance value.
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(2 * (np.power(y_pred, 2-p) / ((1-p) * np.power(y_true, 1-p)) - y_pred * np.power(y_true, -p) + (1-p) * np.power(y_true, 1-p)))

# Calculate Mean Tweedie Deviance using the generated datasets
mean_tweedie_deviance = calculate_mean_tweedie_deviance(y_true, y_pred, p=0)

# Display the result
print(f"Mean Tweedie Deviance: {mean_tweedie_deviance:.4f}")

Mean Tweedie Deviance: 195.9256


## D2 Absolute Error

In [19]:
#Calculate D2 absoulte error
def calculate_d2_absolute_error(y_true, y_pred):
    """
    Calculate D² Absolute Error.
    
    Parameters:
        y_true (array-like): True values.
        y_pred (array-like): Predicted values.
    
    Returns:
        float: D² Absolute Error value.
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs(np.log(y_pred + 1) - np.log(y_true + 1)))

# Calculate D² Absolute Error using the generated datasets
d2_absolute_error = calculate_d2_absolute_error(y_true, y_pred)

# Display the result

print(f"D² Absolute Error: {d2_absolute_error:.4f}")


D² Absolute Error: 0.0814


## Explained Variance Score for regression scoring

In [20]:
#Explained Variance Score for regression
from sklearn.metrics import explained_variance_score

# Calculate Explained Variance Score
evs = explained_variance_score(y_true, y_pred)

# Display the result
print(f"Explained Variance Score: {evs:.4f}")


Explained Variance Score: 0.9058
