In [33]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats


In [34]:
# Unit test for highly correlated set:

x = [0.2, 337.4, 118.2, 884.6, 10.1, 226.5, 666.3, 996.3, 448.6, 777.0,
     558.2, 0.4, 0.6, 775.5, 666.9, 338.0, 447.5, 11.6, 556.0, 228.1,
     995.8, 887.6, 120.2, 0.3, 0.3, 556.8, 339.1, 887.2, 999.0, 779.0,
     11.1, 118.3, 229.2, 669.1, 448.9, 0.5]

y = [0.1, 338.8, 118.1, 888.0, 9.2, 228.1, 668.5, 998.5, 449.1, 778.9,
     559.2, 0.3, 0.1, 778.1, 668.8, 339.3, 448.9, 10.8, 557.7, 228.3,
     998.0, 888.8, 119.6, 0.3, 0.6, 557.6, 339.3, 888.0, 998.5, 778.9,
     10.2, 117.6, 228.9, 668.4, 449.2, 0.2]

round_to = 12

# Expected values
exp_slope = round(1.00211681802045, round_to)
exp_intercept = round(-0.262323073774029, round_to)
exp_rsquared = round(0.999993745883712, round_to)

print('----------------------------')
print('Expected results:')
print(f'   - Slope: {exp_slope}')
print(f'   - Intercept: {exp_intercept}')
print(f'   - R-squared: {exp_rsquared}')

print('----------------------------')

actual = stats.linregress(x, y)
# Returns: 
# LinregressResult(slope=1.0021168180204543, intercept=-0.26232307377398456, rvalue=0.9999968729369666, pvalue=4.654040852584279e-90, stderr=0.00042979684820064804)

actual_slope = round(actual.slope, round_to)
actual_intercept = round(actual.intercept, round_to)
actual_rsquared = round(actual.rvalue**2, round_to)
print('----------------------------')
print('Actual results:')
print(f'   - Slope: {actual_slope}')
print(f'   - Intercept: {actual_intercept}') # interceptexp_intercept
print(f'   - R-squared: {actual_rsquared}')


print('----------------------------')

assert (exp_slope == actual_slope), 'Slope is different!'
assert (exp_intercept == actual_intercept), 'Intercept is different!'
assert (exp_rsquared == actual_rsquared), 'r-squared is different!'

----------------------------
Expected results:
   - Slope: 1.00211681802
   - Intercept: -0.262323073774
   - R-squared: 0.999993745884
----------------------------
----------------------------
Actual results:
   - Slope: 1.00211681802
   - Intercept: -0.262323073774
   - R-squared: 0.999993745884
----------------------------


In [36]:
# Regress a line w sinusoidal noise, with a single input of shape (2, N).
x = np.linspace(0, 100, 100)
y = 0.2 * np.linspace(0, 100, 100) + 10
y += np.sin(np.linspace(0, 20, 100))
rows = np.vstack((x, y))

res = stats.linregress(rows)
res

# LinregressResult(slope=0.19644990055858422, intercept=10.211269918932343, rvalue=0.992791070127779, pvalue=4.145662086656158e-92, stderr=0.002395781449783862)

LinregressResult(slope=0.19644990055858422, intercept=10.211269918932343, rvalue=0.992791070127779, pvalue=4.145662086656158e-92, stderr=0.002395781449783862)

In [37]:
# If the slope of the regression is negative the factor R tend to -1 not 1.
# Sometimes rounding errors makes it < -1 leading to stderr being NaN
a, n = 1e-71, 100000
x = np.linspace(a, 2 * a, n)
y = np.linspace(2 * a, a, n)
stats.linregress(x, y)
res = stats.linregress(x, y)
res

LinregressResult(slope=-1.0, intercept=2.9999999999999993e-71, rvalue=-1.0, pvalue=0.0, stderr=0.0)