In [2]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

def qq_plot(test_df, v, unit, dict_col_regression, unit_regression):
    actual_values = np.array(test_df['Actual'])
    predicted_values = np.array(test_df['Predicted'])

    # Calculate quantiles using scipy's probplot
    (quantiles_actual, values_actual), _ = stats.probplot(actual_values, dist="norm")
    (quantiles_predicted, values_predicted), _ = stats.probplot(predicted_values, dist="norm")

    # Create the QQ plot
    fig, ax = plt.subplots()
    ax.scatter(quantiles_actual, quantiles_predicted, color='blue', marker='x', label='Data Points')

    # Connect the 25th and 75th percentiles with a line
    quantile_color = 'black'
    ax.plot(quantiles_actual[[1, 3]], quantiles_predicted[[1, 3]], linestyle='--', color=quantile_color, label='25-75 Percentile Line')

    # Extend the line beyond 25th and 75th percentiles
    extension_factor = 2
    extension_x1 = quantiles_actual[1] - extension_factor * (quantiles_actual[3] - quantiles_actual[1])
    extension_y1 = quantiles_predicted[1] - extension_factor * (quantiles_predicted[3] - quantiles_predicted[1])
    extension_x2 = quantiles_actual[3] + extension_factor * (quantiles_actual[3] - quantiles_actual[1])
    extension_y2 = quantiles_predicted[3] + extension_factor * (quantiles_predicted[3] - quantiles_predicted[1])
    ax.plot([extension_x1, extension_x2], [extension_y1, extension_y2], linestyle='--', color=quantile_color)

    # Annotate quantiles
    for i, txt in enumerate(["0th", "25th", "50th", "75th", "100th"]):
        ax.annotate(txt, (quantiles_actual[i], quantiles_predicted[i]), textcoords="offset points", xytext=(10, -10), color=quantile_color, ha='center', fontsize=8)

    # Adjust plot limits
    plt.xlim(min(quantiles_actual) - 1, max(quantiles_actual) + 1)
    plt.ylim(min(quantiles_predicted) - 1, max(quantiles_predicted) + 1)

    # Show the plot
    plt.legend(loc='upper left')
    plt.title('Quantile-Quantile Plot for ' + dict_col_regression[v + 'Palas'] + ' ' + unit_regression[unit])
    plt.xlabel('Theoretical Quantiles (Actual)')
    plt.ylabel('Ordered Values (Predicted)')
    plt.show()

# Replace 'test_df', 'v', 'unit', 'dict_col_regression', and 'unit_regression' with your actual data and variables
# qq_plot(test_df, v, unit, dict_col_regression, unit_regression)
