In [2]:
import numpy as np
import matplotlib.pyplot as plt 
from scipy.stats import norm
import scipy.stats as stats
from scipy.optimize import curve_fit

%matplotlib notebook


In [3]:
data = []
for line in open("peak.dat", 'r'):
    item = float(line.rstrip())
    data.append(item)

In [4]:
data = np.array(data)
np.max(data)

2.53686923

In [6]:
bins = 25
counts, binEdges = np.histogram(data, bins)
bincenters = 0.5*(binEdges[1:]+binEdges[:-1])
err = np.sqrt(counts)
#width = 0.05

plt.figure()
plt.hist(data, bins)
plt.errorbar(bincenters, counts, yerr = err)
plt.show()


<IPython.core.display.Javascript object>

In [8]:
# compute mean and std of distribution of energies, and their uncertainties
mean = np.mean(data)
std = np.std(data)

### "And their uncertainties??? What does that even mean"

# standard error of mean
SEM = std/np.sqrt(len(data))

print("Mean:", mean)
print("Standard Deviation:", std)
print('SEM:', SEM)

Mean: 2.203125234667
Standard Deviation: 0.10636126804020885
SEM: 0.0033634386183073344


In [9]:
fittedmean, fittedstd = norm.fit(data)
print("Mean:", fittedmean)
print("Standard Deviation:", fittedstd)

print('Result: same exact results as np.mean and np.std from histogram') 

print('Same mean?', mean == fittedmean)
print('Same standard deviation:', std == fittedstd)

Mean: 2.203125234667
Standard Deviation: 0.10636126804020885
Result: same exact results as np.mean and np.std from histogram
Same mean? True
Same standard deviation: True


In [10]:
def gaussian(x, peak, mu, sigma):
    # x = np.linspace(np.min(data),np.max(data), len(counts))
    return peak * np.exp( - (x - mu)**2 / (2 * sigma**2)) 

In [11]:
# Unbinned Fit Gaussian

mu = fittedmean
sigma = fittedstd
plt.figure()
plt.hist(data, bins)
plt.errorbar(bincenters, counts, yerr = err)
x = np.linspace(np.min(data), np.max(data), 1000)
peak = np.max(counts)
plt.plot(x, gaussian(x, peak, mu, sigma), linewidth=2, color='r')
plt.xlim(np.min(data), np.max(data))
plt.show()

# Result: Curve fits the data well

<IPython.core.display.Javascript object>

In [14]:
# Now, onto the binned least-squares fit Gaussian:

fitparams,hessian = curve_fit(gaussian, bincenters, counts)

In [15]:
### what does "their uncertainties" even refer to? This below is my assumption

paramserror = np.sqrt(np.diag(hessian))
print('Binned fit results:')
print('Error in peak value:', paramserror[0])
print('Mean error:', paramserror[1])
print('STD error:', paramserror[2])
print('For unbinned fit we have:')
print('Error in peak value:', np.sqrt(np.max(counts)))
print('Mean error:', SEM)
print('STD error:', SEM)

Binned fit results:
Error in peak value: 3.401158467861657
Mean error: 0.00422968327213465
STD error: 0.004233444122316887
For unbinned fit we have:
Error in peak value: 10.246950765959598
Mean error: 0.0033634386183073344
STD error: 0.0033634386183073344


In [16]:
fitparams

array([99.81304942,  2.20069633, -0.10751626])

In [17]:
# Binned fit curve vs unbinned fit curve

plt.figure()
plt.plot(x, gaussian(x, fitparams[0], fitparams[1], fitparams[2]), label = 'binned')
plt.plot(x, gaussian(x, peak, mu, sigma), linewidth=2, color='r', label = 'unbinned')
plt.xlim(np.min(data), np.max(data))
plt.legend()
plt.show()
print('Result: Binned fit slightly differs from the unbinned fit, but is largely the same curve')

<IPython.core.display.Javascript object>

Result: Binned fit slightly differs from the unbinned fit, but is largely the same curve


In [189]:
# Next part: 'Compare the histogram from 1 to the fitted curve, and compute a goodness of fit vlaue, such as chi squared'

plt.figure()
plt.hist(data, bins)
plt.errorbar(bincenters, counts, yerr = err, label='hist error bar', color='y')
plt.plot(x, gaussian(x, fitparams[0], fitparams[1], fitparams[2]), label = 'fitted curve', color='r')
plt.xlim(np.min(data), np.max(data))
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

In [18]:
comparisonvals = gaussian(bincenters, fitparams[0], fitparams[1], fitparams[2])
chisq, pval = stats.chisquare(counts, comparisonvals)

In [19]:
print('Chisq:', chisq)
print('P-value:', pval)
print('With the p-value being', pval,'that means there is a ~91% chance that the histogram data fits the generated Gaussian. Thus, this is a good fit.')

Chisq: 26.707486492634892
P-value: 0.318271509271051
With the p-value being 0.318271509271051 that means there is a ~91% chance that the histogram data fits the generated Gaussian. Thus, this is a good fit.
