# Understanding line fitting

First we import numpy and matplotlib as usual.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
plt.style.use("astr19_matplotlib_style.txt")

Now, let's generate some random data about a trend line.

In [None]:
#set a random number seed
np.random.seed(19)

#set number of data points
npoints = 50

#set x
x = np.linspace(0,10.,npoints)

#set slope, intercept, and scatter rms
m = 2.0
b = 1.0
sigma = 2.0

#generate y points
y = m*x + b + np.random.normal(scale=sigma,size=npoints)
y_err = np.full(npoints,sigma)

In [None]:
f = plt.figure(figsize=(7,7))
plt.errorbar(x,y,yerr=y_err,fmt='o')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

#### Method #1, polyfit()

In [None]:
m_fit, b_fit = np.poly1d(np.polyfit(x,y,1,w=1./y_err)) #weight with uncertainties
print(m_fit,b_fit)

y_fit = m_fit * x + b_fit

#### Plot result

In [None]:
f = plt.figure(figsize=(7,7))
plt.errorbar(x,y,yerr=y_err,fmt='o',label='data')
plt.plot(x,y_fit,label='fit')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(loc=2,frameon=False)
plt.show()

#### Is this really the best fit??

In [None]:
x_mean = np.mean(x)
y_mean = np.mean(y)

sum_numerator = np.sum( (x-x_mean) * (y-y_mean) )
sum_denominator = np.sum( (x-x_mean)**2 )

m_calc = sum_numerator / sum_denominator
b_calc = y_mean - m_calc*x_mean

y_calc = m_calc*x + b_calc

print(m_calc,b_calc)

In [None]:
f = plt.figure(figsize=(7,7))
plt.errorbar(x,y,yerr=y_err,fmt='o',label='data')
plt.plot(x,y_fit,label='fit')
plt.plot(x,y_calc,'o',label='calc')
plt.xlabel('x')
plt.ylabel('y')
plt.legend(loc=2,frameon=False)
plt.show()

#### Why is the last digit different?

In [None]:
print(1/3)
print(1/3+1/3+1/3)
print(1/3+1/3+1/3 + 1/3+1/3+1/3)
print((1/3+1/3+1/3) + (1/3+1/3+1/3))