This method plots takes training data, test data and trained model as input and creates a data frame that captures mear error (difference between predicted and actual values), percentage error average (average of percent of error) and also the standard deviation of these errors. 

It also plots histogram plot of the error and percentage error distribution. 

In [28]:
def plot_erro_dist(grid, X_train, X_test, y_train, y_test):
    best_nf = grid.best_estimator_
    X_df = pd.concat([pd.DataFrame({'predict': best_nf.predict(X_test), 'actual': y_test, 'tag': "test"}),
                      pd.DataFrame({'predict': best_nf.predict(X_train), 'actual': y_train, 'tag': "train"})])
    X_df = X_df.assign(
        error = lambda x: abs(x['predict'] - x['actual']),
        p_error = lambda x: x['error'].div(x['actual'])
    )
    
    # Compute Errors Metrics.
    error_mean = X_df.query('tag == "test"')['error'].mean()
    error_std = X_df.query('tag == "test"')['error'].std()
    print(f"error mean = {error_mean: 0.2f}, error std = {error_std: 0.2f}")
    p_error_mean = X_df.query('tag == "test"')['p_error'].mean()
    p_error_std = X_df.query('tag == "test"')['p_error'].std()
    print(f"p_error mean = {p_error_mean: 0.2%}, p_error std = {p_error_std: 0.2%}")
    mape = X_df['p_error'].abs().mean()
    wape = X_df['error'].abs().sum() / X_df['actual'].abs().sum()

    try:
        # Define plot grid.
        fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    
        sns.set_style(
            style='darkgrid',
            rc={'axes.facecolor': '.9', 'grid.color': '.8'}
        )
        sns.set_palette(palette='deep')
        sns_c = sns.color_palette(palette='deep')
    
        # Add data.
        sns.histplot(data=X_df['error'], bins=100, color=sns_c[0], kde=True, ax=axs[0])
        axs[0].axvline(x=error_mean, color=sns_c[0], linestyle='--', label=f'error mean = {error_mean: 0.2f}', alpha=0.8)
        axs[0].axvline(x=error_mean + 2*error_std, color=sns_c[0], linestyle='--', alpha=0.5)
        axs[0].axvline(x=error_mean - 2*error_std, color=sns_c[0], linestyle='--', alpha=0.5)
    
        sns.histplot(data=X_df['p_error'], bins = 100, color=sns_c[3], kde=True, ax=axs[1])
        axs[1].axvline(x=p_error_mean , color=sns_c[3], linestyle='--', label=f'p_error mean = {p_error_mean: 0.2%}', alpha=0.8)
        axs[1].axvline(x=p_error_mean + 2*p_error_std, color=sns_c[3], linestyle='--', alpha=0.5)
        axs[1].axvline(x=p_error_mean - 2*p_error_std, color=sns_c[3], linestyle='--', alpha=0.5)
    
        axs[0].set(title='Absolute')
        axs[1].set(title='Percentage')]
        
        plt.suptitle(f'Linear Model Errors Distribution', y=0.94);
    
        # Adjust layout for better spacing
        plt.tight_layout()
    
        # Show the plots
        plt.show()
    except Exception as e:
        # Catch the exception and print detailed information
        print(f"An error occurred: {e}")  # Print the error message
        print(f"Exception type: {type(e).__name__}")  # Print the type of the exception
        return error_mean, p_error_mean, error_std, p_error_std
        
    return error_mean, p_error_mean, error_std, p_error_std
    

In [29]:
def plot_correlation_matrix (df):
    plt.figure(figsize=(12, 10))  # Set the figure size
    sns.heatmap(corr_matrix, cmap='coolwarm', linewidths=0.5)
    #sns.heatmap(df.corr(), cmap=sns.cubehelix_palette(as_cmap=True))
    #sns.heatmap(corr_matrix, cmap='crest')

    # Add a title
    plt.title('Correlation Heatmap')

    # Show the plot
    plt.show()

This method two line plots WiFi Retries vs activity and 'activity' vs MOS score.

In [30]:
def plot_kpis(data):
    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
    
    sns.scatterplot(data=data.query('`wifi retries` < 200000'), x='wifi retries', y='activity', ax=axes[0, 0])
    axes[0, 0].set_title('Scatter: WiFi Retries vs WiFi Utilization')

    bins = pd.cut(data.query('`wifi retries` < 200000')['wifi retries'], bins=3, labels=['Low', 'Medium', 'High'])
    sns.scatterplot(data=data.query('`wifi retries` < 200000'), x='wifi retries', y='activity', hue = bins, ax=axes[0, 1])
    axes[0, 1].set_title('Scatter: WiFi Retries vs activity (with low-high bins)')

    grouped_activity = data.groupby('activity', as_index=False)['MOS'].mean()
    sns.lineplot(data=grouped_activity, x='activity', y='MOS', ax = axes[1,0])
    #sns.scatterplot(data = data, x = 'activity', y = 'MOS_x', ax = axes[1,0])
    axes[1, 0].set_title('Scatter: WiFi activity vs MOS Score')

    filtered_data = data.query('`wifi retries` < 25000')
    filtered_data['retries'] = pd.cut(filtered_data['wifi retries'], bins=10, labels=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
    grouped_retry = filtered_data.groupby('retries', observed=True)['MOS'].mean().reset_index()
    sns.lineplot(data = grouped_retry, x = 'retries', y = 'MOS', marker='o', ax = axes[1,1])
    axes[1, 1].set_title('Scatter: WiFi retries vs MOS Score')

    # Adjust layout for better spacing
    plt.tight_layout()

    # Show the plots
    plt.show()