In [54]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import math
import scipy.stats as sts
import scipy.optimize as opt

In [23]:
dataQ2 = pd.read_csv('sick.txt', header = 0)
print(dataQ2)

     ﻿sick    age  children  avgtemp_winter
0     1.67  57.47      3.04           54.10
1     0.71  26.77      1.20           36.54
2     1.39  41.85      2.31           32.38
3     1.37  51.27      2.46           52.94
4     1.45  44.22      2.72           45.90
5     0.62  44.67      1.20           68.60
6     1.06  27.92      1.84           29.42
7     1.29  44.87      2.40           50.49
8     1.05  53.68      1.65           55.12
9     1.18  39.63      1.96           37.30
10    1.21  44.73      1.98           41.21
11    0.63  41.05      1.04           56.73
12    0.67  30.44      1.08           41.38
13    0.64  37.77      0.94           47.97
14    0.86  30.46      1.39           34.33
15    1.25  46.54      2.10           44.26
16    0.88  51.92      1.53           65.57
17    0.12  15.46      0.00           33.46
18    1.53  59.02      2.67           55.67
19    1.36  45.63      2.35           42.02
20    0.73  42.65      1.03           48.89
21    0.15  28.09      0.00     

In [98]:
def normal_pdf(x, mean, sd):
    values=(1/(sd*np.sqrt(2*np.pi))*np.exp(-(x-mean)**2/(2*sd**2)))
    return values

n=len(dataQ2.sick1)

def log_like_q2 (dataQ2, b0, b1,b2,b3,sd):
    error=dataQ2['sick1']-(b0+b1*dataQ2['age']+b2*dataQ2['children']+b3*dataQ2['avgtemp_winter'])
    log_pdf_values=-n/2*np.log(2*np.pi)-n*np.log(sd)-1/(2*sd**2)*np.sum(error**2)
    log_like=log_pdf_values.sum()
    return log_like

def crit_Q2(params, *args):
    b0, b1, b2, b3, sd=params
    data_Q2=args[0]
    log_like_value=log_like_q2(dataQ2, b0, b1, b2, b3, sd)
    neg_log_like_value=-log_like_value
    return neg_log_like_value

dataQ2.columns=['sick1', 'age', 'children', 'avgtemp_winter']

In [99]:
b0=0
b1=0
b2=0
b3=0
sd=0.1

age = dataQ2.age
children = dataQ2.children
winter = dataQ2.avgtemp_winter
sick = dataQ2.sick1

init_parameters=np.array([b0,b1, b2, b3, sd])
bounds=((None, None), (None, None), (None, None), (None, None), (0.001, None))
results=opt.minimize(crit_Q2, init_parameters, args=((age, children, winter, sick)), method='L-BFGS-B', 
bounds=bounds)

print('The array of MLE optimized parameters is', results.x)
print('The log likelihood function is', -results.fun)

The array of MLE optimized parameters is [ 0.25164459  0.01293349  0.40050085 -0.00999171  0.00301755]
The log likelihood function is 876.865066401


In [103]:
#From Prof. Evans' Notes:
OffDiagNeg=np.eye(5)*2-1
vcv_mle=results.hess_inv.todense()*OffDiagNeg
stderr_mu_mle=np.sqrt(vcv_mle[0,0])
stderr_sig_mle=np.sqrt(vcv_mle[1,1])
print("The variance/covariance matrix is",
      vcv_mle)
print("The standard error for mean is", stderr_mu_mle)
print("The standard error for the sd is", stderr_sig_mle)
print(vcv_mle)

The variance/covariance matrix is [[  1.08658284e+02   1.59951411e+00   9.72932170e+00   7.51358476e-01
   -6.09381308e-01]
 [  1.59951411e+00   4.12712689e-02   2.36763352e-02  -5.54015639e-04
    3.22451359e-03]
 [  9.72932170e+00   2.36763352e-02   2.46726580e+00  -1.65044640e-01
    1.09911715e-01]
 [  7.51358476e-01  -5.54015639e-04  -1.65044640e-01   1.14774285e-02
    7.56101138e-03]
 [ -6.09381308e-01   3.22451359e-03   1.09911715e-01   7.56101138e-03
    5.34367470e-03]]
The standard error for mean is 10.4239284378
The standard error for the sd is 0.203153313795
[[  1.08658284e+02   1.59951411e+00   9.72932170e+00   7.51358476e-01
   -6.09381308e-01]
 [  1.59951411e+00   4.12712689e-02   2.36763352e-02  -5.54015639e-04
    3.22451359e-03]
 [  9.72932170e+00   2.36763352e-02   2.46726580e+00  -1.65044640e-01
    1.09911715e-01]
 [  7.51358476e-01  -5.54015639e-04  -1.65044640e-01   1.14774285e-02
    7.56101138e-03]
 [ -6.09381308e-01   3.22451359e-03   1.09911715e-01   7.56101

In [90]:
b0=1
sd=0.01
b1=0
b2=0
b3=0
null = log_like_q2(dataQ2, b0, b1, b2, b3, sd)
mle_calc = log_like_q2(dataQ2, 0.25201543, 0.01295189, 0.40030231, -0.01000917, 0.051814)
ratio = 2 * (mle_calc - null)
p = 1.0 - sts.chi2.cdf(ratio, 2)
print('The P-Value is = ', p, ',therefore we can reject the null hypothesis.')

The P-Value is =  0.0 ,therefore we can reject the null hypothesis.
