# Lecture 7 
Please refer to lecture slides to understand the context of these examples

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Visualize a Poisson likelihood function



In [None]:
obs = 110
exp = np.linspace(90,130,401)
prob = poisson.pmf(obs,exp)
logProb = poisson.logpmf(obs,exp)

plt.figure(0)
plt.plot(exp,prob,marker='o')
plt.ylabel('Probability')
plt.xlabel('Expectation')
plt.text(90,0.035,'Observed = 110')

plt.figure(1)
plt.plot(exp,logProb,marker='o')
plt.ylabel('log Poisson probability')
plt.xlabel('Expectation')
plt.text(90,-3.5,'Observed = 110')


NLL = -1*logProb - np.min(-1*logProb)
plt.figure(2)
plt.plot(exp,NLL,marker='o')
plt.ylabel('negative log Poisson probability')
plt.xlabel('Expectation')
plt.plot([90,140],[0,0],ls='dashed',color='red')
plt.text(120,2,'Observed = 110')

In [None]:
print(exp[np.argmax(NLL<0.5)])
pos = np.argmax(NLL<0.5)
error_lo = 110-exp[np.argmax(NLL<0.5)]

print(pos)
print(exp[np.argmax(NLL[pos+1:]>0.5)+pos])
error_hi = exp[np.argmax(NLL[pos+1:]>0.5)+pos] - 110


print("+ 1 sigma error is %4.2f "% error_hi )
print("- 1 sigma error is %4.2f "% error_lo )


# Body weight vs height 

In [None]:
#body weight vs height

rng = np.random.default_rng(1)
h=rng.normal(5.5,0.5,size=200000)
w=rng.normal(h,0.95)*30-20

hw = np.vstack((h,w)).T
print(hw)


### Getting fractions

In [None]:

fraction = np.sum(h>6)/h.size

fraction2 = np.sum(w>200)/w.size

jointfraction = np.sum( (h>6)*(w>200))/h.size

print(fraction, fraction2, fraction*fraction2, jointfraction)


### Plotting

In [None]:
plt.figure(0)
plt.hist(h,bins=30,range=(4,7),density=True)
plt.xlabel('height [feet]')
plt.ylabel('fraction of entries')
plt.figure(1)
plt.hist(w,bins=20,range=(80,220),color='orange',density=True)
plt.xlabel('weight [lbs]')
plt.ylabel('fraction of entries')
plt.figure(2)
plt.hist2d(h,w,bins=[30,20],range=[[4,7],[80,220]],density=True)
plt.xlabel('height [feet]')
plt.ylabel('weight [lbs]')
plt.colorbar(label='fraction of entries')


### Random shuffle

In [None]:
# Note what these two lines do?

rng.shuffle(h)
rng.shuffle(w)


plt.hist2d(h,w,bins=[30,20],range=[[4,7],[80,220]],density=True)
plt.xlabel('height [feet]')
plt.ylabel('weight [lbs]')
plt.colorbar(label='fraction of entries')

# Single bin hypothesis testing

In [None]:
import numpy as np
import matplotlib.pyplot as plt
rng = np.random.default_rng(32)
sampleB = rng.poisson(100,size=(500000))
sampleSB = rng.poisson(140,size=(500000))

In [None]:

plt.plot([120,120],[1,0 ],label='Observation')
plt.plot([100,100],[1,0 ],ls='dashed', label='Expected B')
plt.plot([140,140],[1,0 ],ls='dotted',label='Expected S+B')
plt.legend(loc='upper right')
plt.xlim(70,180)
plt.text(121,0.5, 'Observed')
plt.text(141,0.5, 'Expected S+B')
plt.text(80,0.5, 'Expected B')

plt.xlabel("Outcome")
plt.ylabel('Entries')

In [None]:
print(sampleB.mean(), sampleSB.mean())
print(sampleB.std(), sampleSB.std())

In [None]:
bincontent, binedges, others=plt.hist(sampleB,bins=110,range=(70,180),histtype="step",label="background-only",density=True)
bincontent2, binedges, others=plt.hist(sampleSB,bins=110,range=(70,180),label="signal-plus-background",density=True)
plt.plot([120,120],[np.max(bincontent)*0.9,0 ],label='Observation')
plt.plot([100,100],[np.max(bincontent),0 ],ls='dashed')
plt.plot([140,140],[np.max(bincontent2),0 ],ls='dotted')
plt.legend(loc='upper right')
plt.xlabel("Outcome")
plt.ylabel('Entries')

### Pseudo experiments

sampleB and sampleSB are two samples of outcomes generated from the `B-only hypothesis` and the `S+B` hypothesis

In [None]:
fraction_greater_B = np.sum( sampleB >= 120 ) / sampleB.size
fraction_less_SB = np.sum( sampleSB <= 120 ) / sampleSB.size

print("In the P.E.s generated from B, probability to see 120 or more is %4.4f"% fraction_greater_B)
print("In the P.E.s generated from S+B, probability to see 120 or less is %4.4f"% fraction_less_SB)

**What is the standard deviation here?**

# p-value, significance conversion

In [None]:
from scipy.stats import norm
significance = 2
pvalue = 1- norm.cdf(2)
print('p-value for Z = 2  %4.10f'% pvalue)

pvalue = 1- norm.cdf(5)
print('p-value for Z = 5  %4.10f'% pvalue)


In [None]:
pvalue = 0.1
# percent point function
Z = norm.ppf(1-pvalue)
print(Z)

pvalue = 1- norm.cdf(5)
Z = norm.ppf(1-pvalue)
print(Z)

# Log likelihood ratio and multibin pseudo experiments

# $$ -2\mathrm{log}\frac{L_{s+b}}{L_{b}} $$

In [None]:
B = np.ones((5,))*100
S = np.ones((5,))*20
SB = S+B
Obs = np.array([133, 121, 117, 119, 126])
Obs2 = np.array([133, 121, 117, 119, 126]) -15

In [None]:
def loglikelihood(Obs, Exp):
    return -Exp + Obs*np.log(Exp)

In [None]:
LLB = loglikelihood(Obs,B)
LLSB = loglikelihood(Obs,SB)
LLR = -2*(LLSB-LLB)
print(LRB, LRSB, np.sum(LLR))
obsLLR = np.sum(LLR)

In [None]:
LLB = loglikelihood(Obs2,B)
LLSB = loglikelihood(Obs2,SB)
LLR = -2*(LLSB-LLB)
print(LRB, LRSB, np.sum(LLR))
obsLLR2 = np.sum(LLR)

In [None]:
sampleBonly = rng.poisson(B,size=(2000,5))
sampleSB = rng.poisson(SB,size=(2000,5))
print(sampleBonly.shape, sampleSB.shape)

In [None]:
sampleBLLB = loglikelihood(sampleBonly,B)
sampleBLLSB = loglikelihood(sampleBonly,SB)
sampleBLLR = np.sum(-2*(sampleBLLSB-sampleBLLB),axis=1)

sampleSBLLB = loglikelihood(sampleSB,B)
sampleSBLLSB = loglikelihood(sampleSB,SB)
sampleSBLLR = np.sum(-2*(sampleSBLLSB-sampleSBLLB),axis=1)

bincontents,binedges,others=plt.hist(sampleBLLR,bins=40, range=(-40,40),histtype='step',label='B-only ensemble')
plt.hist(sampleSBLLR,bins=40, range=(-40,40),histtype='step',label='S+B Ensemble')
plt.plot([obsLLR, obsLLR],[np.max(bincontents),0],color='red',label='Observed')
plt.plot([obsLLR2, obsLLR2],[np.max(bincontents),0],color='black',ls='dashed',label='Observed 2')
plt.legend()
plt.ylim(0,np.max(bincontents)*1.625)
plt.xlabel('-2log($L_{s+b}/L_{b}$)')
plt.ylabel('Entries')


In [None]:
fractionB = np.sum( sampleBLLR < obsLLR2 )/ sampleBLLR.size
fractionSB = np.sum( sampleSBLLR > obsLLR2 )/ sampleSBLLR.size


print(fractionB, fractionSB)
print('Probability of S+B hypothesis fluctuating to data is %4.4f'% fractionSB )
print('Probability of B-only hypothesis fluctuating to data is %4.4f'% fractionB )

# Measurement and Error


In [None]:
PE = rng.poisson(110,size=10000)

In [None]:
stdPE = PE.std()
fractionPE = np.sum(PE> PE.mean()+stdPE)/PE.size
print(fractionPE)

fractionPE2 = np.sum(PE< PE.mean()-stdPE)/PE.size
print(fractionPE2)

In [None]:
bincount, binedge, other = plt.hist(PE, bins=(40), range=(50,160),histtype='step')
plt.plot([PE.mean()+stdPE, PE.mean()+stdPE], [np.max(bincount),0],color='red',label='+ One sigma',ls='dotted')
plt.plot([PE.mean()-stdPE, PE.mean()-stdPE], [np.max(bincount),0],color='green',ls='dashed',label='- One sigma')
plt.plot([PE.mean(), PE.mean()], [np.max(bincount),0],color='black',label='Mean')
plt.xlabel('Outcome')
plt.ylabel('Entries')
plt.legend()