(correlation)=
# Regression


```{admonition} Important Readings
:class: seealso
- {cite}`freedman2007statistics`, Chapters 10, 11, 12
```

## Regression to Mediocrity


In [10]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, PointDrawTool, CustomJS, Div, HoverTool, Line
from bokeh.layouts import column
from IPython.display import HTML

# Display plot within a notebook
output_notebook(hide_banner=True)

# Sample data
data = {'x': [1, 2, 3, 4, 5], 'y': [1, 2, 3, 4, 5]}

# Helper function to calculate initial statistics
def calculate_initial_statistics(data):
    x = data['x']
    y = data['y']
    n = len(x)
    if n == 0:  # Avoid division by zero if there are no data points
        return 0, 0, 0, 0, 0
    mean_x = sum(x) / n
    mean_y = sum(y) / n
    slope = (sum(x[i]*y[i] for i in range(n)) - n*mean_x*mean_y) / (sum(x[i]**2 for i in range(n)) - n*mean_x**2)
    intercept = mean_y - slope*mean_x
    sdx = (sum((xi - mean_x) ** 2 for xi in x) / n) ** 0.5
    sdy = (sum((yi - mean_y) ** 2 for yi in y) / n) ** 0.5
    r = (sum((x[i] - mean_x) * (y[i] - mean_y) for i in range(n)) / n) / (sdx * sdy)
    return slope, intercept, sdx, sdy, r

# Calculate initial statistics
slope, intercept, sdx, sdy, r = calculate_initial_statistics(data)

# Create ColumnDataSource for data and regression line
source = ColumnDataSource(data)
line_source = ColumnDataSource(data={'x': [min(data['x']), max(data['x'])], 'y': [slope * min(data['x']) + intercept, slope * max(data['x']) + intercept]})

# Create a figure
width = 450
height = width
p = figure(x_range=(0, 6), y_range=(0, 6), tools="save", height=height, width=width)

# Add circle glyph for data points, shade of blue
c1 = p.circle('x', 'y', source=source, size=20, color="deepskyblue", alpha=0.5)

# Add line glyph for regression line, visible from the start
line = p.line('x', 'y', source=line_source, line_width=3, color="black", alpha=0.7)

# Display for regression details with initial values
div = Div(text=f"Slope: {slope:.4f}<br>Intercept: {intercept:.4f}<br>SD of X: {sdx:.4f}<br>SD of Y: {sdy:.4f}<br>Correlation Coefficient: {r:.4f}", width=400, height=100)

# CustomJS for updating the regression details dynamically
callback = CustomJS(args=dict(source=source, line_source=line_source, div=div), code="""
    const data = source.data;
    const x = data['x'];
    const y = data['y'];
    const n = x.length;

    if (n === 0) {
        line_source.data = {'x': [], 'y': []};
        div.text = "Slope: N/A<br>Intercept: N/A<br>SD of X: N/A<br>SD of Y: N/A<br>Correlation Coefficient: N/A";
        return;
    }

    let sum_x = 0, sum_y = 0, sum_xy = 0, sum_xx = 0, sum_yy = 0;
    for (let i = 0; i < n; i++) {
        sum_x += x[i];
        sum_y += y[i];
        sum_xy += x[i] * y[i];
        sum_xx += x[i] * x[i];
        sum_yy += y[i] * y[i];
    }

    const mean_x = sum_x / n;
    const mean_y = sum_y / n;
    const slope = (sum_xy - n * mean_x * mean_y) / (sum_xx - n * mean_x * mean_x);
    const intercept = mean_y - slope * mean_x;
    const sdx = Math.sqrt(sum_xx / n - mean_x * mean_x);
    const sdy = Math.sqrt(sum_yy / n - mean_y * mean_y);
    const r = (sum_xy - n * mean_x * mean_y) / (Math.sqrt((sum_xx - n * mean_x * mean_x) * (sum_yy - n * mean_y * mean_y)));

    const reg_x = [Math.min(...x), Math.max(...x)];
    const reg_y = reg_x.map(x => slope * x + intercept);

    line_source.data = {'x': reg_x, 'y': reg_y};
    line_source.change.emit();

    div.text = "Slope: " + slope.toFixed(4) + "<br>Intercept: " + intercept.toFixed(4) +
               "<br>SD of X: " + sdx.toFixed(4) + "<br>SD of Y: " + sdy.toFixed(4) +
               "<br>Correlation Coefficient: " + r.toFixed(4);
""")

# Attach the callback to the source for updating regression
source.js_on_change('data', callback)

# Add PointDrawTool and HoverTool to the figure
tool = PointDrawTool(renderers=[c1], add=True)
p.add_tools(tool)
p.toolbar.active_tap = tool
hover = HoverTool(renderers=[c1], tooltips=[("X", "@x"), ("Y", "@y")])
p.add_tools(hover)

# Show plot
style = """
<style>
.output {
    display: flex;
    align-items: center;
    justify-content: center;
}
</style>
"""
display(HTML(style))
show(column(div, p))
