
# Intuition for Publication Bias

This notebook visualizes the impact of publication bias on research findings. It uses Plotly for data visualization.


In [None]:

import numpy as np
import plotly.graph_objects as go
import plotly.subplots as sp

def plot_bias(non_significant_rate, effect_size):
    significant_rate = 1 - non_significant_rate
    dark_blue = "#1f77b4"
    dark_orange = "#ff7f0e"

    x_values = np.linspace(-10, 10, 500)
    real_pdf = np.exp(-0.5 * (x_values - effect_size) ** 2) / np.sqrt(2 * np.pi)

    published_pdf = np.copy(real_pdf)
    published_pdf[x_values < 2] *= significant_rate
    published_pdf /= np.trapz(published_pdf, x_values)

    bins = [-10, -1.96, 0, 1.96, 10]
    real_integrals = [np.trapz(real_pdf[(x_values >= bins[i]) & (x_values < bins[i + 1])],
                               x_values[(x_values >= bins[i]) & (x_values < bins[i + 1])])
                      for i in range(len(bins) - 1)]
    published_integrals = [np.trapz(published_pdf[(x_values >= bins[i]) & (x_values < bins[i + 1])],
                                    x_values[(x_values >= bins[i]) & (x_values < bins[i + 1])])
                           for i in range(len(bins) - 1)]

    bar_labels = ["p<0.05, neg", "NS, neg", "NS, pos", "p<0.05, pos"]
    real_bar_values = np.array(real_integrals) / np.sum(real_integrals)
    published_bar_values = np.array(published_integrals) / np.sum(published_integrals)

    fig = sp.make_subplots(
        rows=4, cols=1,
        row_heights=[0.25, 0.25, 0.25, 0.25],
        vertical_spacing=0.1,
        subplot_titles=[
            "Distribution of Real Effects",
            "Distribution of Published Papers",
            "Significance of Experiments",
            "Significance of Published Papers"
        ]
    )

    fig.add_trace(go.Scatter(x=x_values, y=real_pdf, mode='lines', line=dict(color=dark_blue)), row=1, col=1)
    fig.add_trace(go.Scatter(x=x_values, y=published_pdf, mode='lines', line=dict(color=dark_orange)), row=2, col=1)
    fig.add_trace(go.Bar(x=bar_labels, y=real_bar_values, marker_color=dark_blue), row=3, col=1)
    fig.add_trace(go.Bar(x=bar_labels, y=published_bar_values, marker_color=dark_orange), row=4, col=1)

    fig.update_layout(
        autosize=True,
        height=900,
        title="Publication Bias Visualization",
        title_x=0.5,
        showlegend=False
    )

    fig.update_xaxes(title_text="Z-score (mean/std)", row=1, col=1)
    fig.update_yaxes(title_text="Density", row=1, col=1)
    fig.update_xaxes(title_text="Z-score (mean/std)", row=2, col=1)
    fig.update_yaxes(title_text="Density", row=2, col=1)
    return fig

# Display an initial plot
plot_bias(0.5, 1).show()
