In [1]:
import numpy as np

In [2]:
class EM:
    def __init__(self,Y_M, Y_N = None, Z_N = None, sigma_e_init = 0, W_init = 0 ):
        self.Y_N = Y_N
        self.Y_M = Y_M
        self.Z_N = Z_N
        self.Z_M = None
        
        
        self.M = len(Y_M)
        self.N = len(Y_N)  if type(self.Y_N) != type(None) else 0
        self.S = self.M + self.N
        
        self.sigma_e = sigma_e_init
        self.W = W_init
        
        self.EZ = None
        self.EZZ = None
        
        
    def e_step(self):
        A = (self.W**2) * (self.sigma_e**-2) + 1
        B = self.W*self.Y_M*(self.sigma_e**-2)
        
        self.EZ = B/A
        self.EZZ = (1/A)  + (B/A)**2
        
    def m_step(self):
        # Must update W first, as it is required for sigma
        self.W = self.new_W()
        self.sigma_e = self.new_sigma_e()
        
        
    def new_sigma_e(self):
        _1 = np.sum((self.Y_N-self.W*self.Z_N)**2) if type(self.Y_N) != type(None) else 0
        _2 = np.sum(self.Y_M**2)
        _3 = -2*self.W*np.sum(self.Y_M*self.EZ)
        _4 = self.W**2 * np.sum(self.EZZ)

        return np.sqrt((_1 + _2 + _3 + _4)/self.S)
    
    
    def new_W(self):
        _1 = np.sum(self.Y_N*self.Z_N)  if type(self.Y_N) != type(None) else 0
        _2 = np.sum(self.Y_M*self.EZ)
        _3 = np.sum(self.Z_N**2) if type(self.Y_N) != type(None) else 0
        _4 = np.sum(self.EZZ)
        return (_1 + _2)/(_3 + _4)

        

In [3]:
def test(sigma_e, W, ratio_obs):
    # Setup

    size = 10000
    #ratio_obs = 0.05
    num_obs = int(size*ratio_obs)

    Z = np.random.normal(0,1, size)
    e = np.random.normal(0, sigma_e, size)

    Y = W*Z + e

    Y_N = Y[:num_obs] # Values of Y 
    Z_N = Z[:num_obs] # observed values of Z

    Y_M = Y[num_obs:] # values of Y where Z is unobserved
    Z_M = Z[num_obs:] # values of Z where it is unobserved


    # Complete data solution:

    C_sigma_e = np.sqrt(np.sum((Y-W*Z)**2)/size)
    C_W = np.sum(Y*Z)/np.sum(Z**2)

    print(f'Complete Data solution: (sig_e,W): {C_sigma_e, C_W}')

    EM1 = EM(Y_M,Y_N, Z_N, sigma_e, W)
    for i in range(100):
        EM1.e_step()
        EM1.m_step()
        print(EM1.sigma_e, EM1.W)

# sigma_e = 0.05, W = 5

In [4]:
sigma_e = 0.05
W = 5


In [5]:
ratio_obs = 0.01
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.049800311173332744, 5.0008373032128235)
0.050011708714820545 4.999967048239031
0.050023275242874164 4.999934335653224
0.05003470138901913 4.99990186050355
0.05004598893410577 4.9998696210636195
0.050057139634704664 4.999837615619596
0.0500681552237636 4.999805842470099
0.050079037411407415 4.999774299926118
0.05008978788439879 4.999742986310926
0.0501004083069635 4.999711899959985
0.050110900320974185 4.999681039220867
0.05012126554645187 4.999650402453155
0.05013150558192035 4.999619988028364
0.0501416220043525 4.999589794329855
0.050151616369608576 4.999559819752748
0.05016149021310485 4.999530062703833
0.050171245049552046 4.999500521601496
0.05018088237379478 4.999471194875624
0.050190403660721236 4.999442080967527
0.05019981036563535 4.999413178329859
0.050209103924540824 4.999384485426528
0.05021828575459732 4.999356000732622
0.05022735725390894 4.999327722734324
0.050236319802499464 4.9992996499288305
0.050245174761895404 4.999271780824279
0

In [6]:
ratio_obs = 0.25
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.04999051510437729, 4.999352673249836)
0.05017249003757822 4.999793874971126
0.05030092224810515 4.9996384053333705
0.050396722001285185 4.999521138714795
0.050468277388977585 4.9994326854803415
0.050521778523334115 4.999365964729709
0.05056181146140423 4.999315636222714
0.050591784018353805 4.999277672254511
0.05061423421826917 4.999249034928594
0.05063105554131434 4.999227432837986
0.050643662446061645 4.999211137591838
0.05065311259526273 4.999198845454472
0.05066019744629143 4.999189572996808
0.050665509586445404 4.999182578394722
0.05066949289262227 4.9991773020682455
0.05067247995685661 4.999173321907209
0.05067472004799292 4.9991703194982495
0.05067640002011338 4.999168054649616
0.050677659960782655 4.999166346174802
0.05067860490650329 4.999165057397158
0.050679313619456705 4.999164085215586
0.0506798451630367 4.9991633518564225
0.050680243831010593 4.999162798651559
0.05068054284174839 4.99916238134504
0.05068076710824131 4.999162066552634


In [7]:
ratio_obs = 0.5
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.05068849000247689, 5.000395492397793)
0.050418993327546596 5.000060631757414
0.05062716931267237 5.000091784512783
0.05073093359259649 5.000107789155842
0.05078273635513527 5.00011601073361
0.05080861848588089 5.000120233798498
0.050821555021701686 5.0001224028313676
0.050828022300604875 5.000123516797465
0.050831255766245115 5.000124088864023
0.05083287249332379 5.000124382622883
0.05083368087479557 5.000124533459286
0.050834085079904695 5.000124610904221
0.05083428719112289 5.0001246506647705
0.05083438825149031 5.000124671076746
0.05083443878421797 5.000124681555068
0.05083446405187976 5.000124686933717
0.05083447668641444 5.000124689694487
0.050834483004031275 5.000124691111465
0.05083448616297537 5.000124691838698
0.050834487742547535 5.000124692211915
0.05083448853241232 5.000124692403441
0.050834488927351866 5.000124692501722
0.05083448912482879 5.000124692552154
0.05083448922357441 5.000124692578028
0.05083448927295438 5.000124692591306
0.0

In [8]:
ratio_obs = 0.75
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.04981891253308768, 5.00052882315938)
0.0498499256884094 5.0002411412000916
0.04981214747616263 5.000299794320253
0.04980268526990459 5.000314059446613
0.0498003181733009 5.0003175288421104
0.049799726188963986 5.000318372628985
0.0497995781517274 5.000318577846428
0.04979954113300791 5.000318627757721
0.049799531876113905 5.000318639896824
0.049799529561314844 5.000318642849242
0.04979952898249087 5.000318643567322
0.0497995288377447 5.000318643741973
0.04979952880156181 5.000318643784451
0.0497995287925033 5.000318643794783
0.049799528790238676 5.0003186437972955
0.04979952878966887 5.0003186437979075
0.049799528789537376 5.000318643798057
0.049799528789493536 5.000318643798092
0.049799528789486236 5.000318643798101
0.04979952878950084 5.000318643798103
0.049799528789486236 5.000318643798104
0.04979952878950084 5.000318643798104
0.049799528789493536 5.000318643798104
0.049799528789493536 5.000318643798104
0.049799528789493536 5.000318643798104
0.0

In [9]:
ratio_obs = 0.99
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.0505965500101309, 5.000195025250316)
0.05056097912518655 5.000105566940028
0.05056655931750231 5.000106979603593
0.05056661515954341 5.0001069983968796
0.05056661571853339 5.000106998645826
0.05056661572412975 5.0001069986491125
0.05056661572418596 5.000106998649156
0.05056661572418506 5.000106998649157
0.05056661572418686 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.05056661572418596 5.000106998649157
0.050566615

# sigma_e = 0.03, sigma_z = 7

In [10]:
sigma_e = 0.03
W = 7


In [11]:
ratio_obs = 0.01
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.029952600873601663, 6.9995649473872)
0.03002007872181386 7.000010768385109
0.030039939192568883 7.000021424734401
0.030059583974235454 7.0000319702114595
0.030079015594192186 7.000042405967817
0.030098236546002362 7.000052733143081
0.0301172492902783 7.000062952865053
0.03013605625428143 7.000073066249861
0.030154659834027854 7.000083074402065
0.03017306239339072 7.000092978414785
0.03019126626580355 7.000102779369815
0.03020927375325491 7.000112478337745
0.030227087127785244 7.000122076378076
0.03024470863153024 7.000131574539327
0.030262140478105937 7.000140973859153
0.030279384851485514 7.000150275364462
0.030296443908814245 7.000159480071513
0.030313319777930166 7.00016858898604
0.03033001455949491 7.000177603103353
0.030346530327387176 7.0001865234084475
0.030362869128899357 7.000195350876108
0.030379032985025554 7.000204086471016
0.03039502389112804 7.000212731147854
0.030410843816928098 7.000221285851411
0.030426494706971716 7.00022975151667

In [12]:
ratio_obs = 0.25
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.03004391728067781, 7.000277926295368)
0.030050666866706335 6.999829234275098
0.03008802776982906 6.999703730675545
0.030115707565927713 6.999611490203591
0.030136286296947135 6.999543695719924
0.030151624670454385 6.999493867774331
0.030163078351501067 6.999457244587917
0.03017164272142574 6.999430326582641
0.030178052882880498 6.999410541742343
0.030182854065310526 6.999395999724767
0.030186451953526896 6.999385311172883
0.030189149105206526 6.999377454929859
0.03019117154875805 6.999371680453543
0.030192688348643082 6.999367436097299
0.030193826073837735 6.9993643163997215
0.03019467954148404 6.999362023344313
0.030195319813616256 6.999360337886242
0.03019580016788725 6.999359099024936
0.030196160556933308 6.999358188422624
0.03019643094627673 6.999357519099029
0.030196633814113496 6.999357027121967
0.030196786022527376 6.999356665499901
0.03019690022218729 6.999356399692937
0.03019698590463483 6.999356204313356
0.030197050190550436 6.99935606070

In [13]:
ratio_obs = 0.5
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.03015144383041012, 7.000044701630436)
0.03016604374466958 7.000194686907662
0.030248251510891267 7.000292520970032
0.030289155746165274 7.0003416823497835
0.030309559466826373 7.000366385258843
0.030319749975528245 7.0003787979780805
0.03032484277530607 7.000385035081447
0.030327388757897963 7.0003881690694145
0.0303286617445443 7.000389743816032
0.03032929828716192 7.000390535083524
0.030329616596114474 7.000390932673474
0.03032977577291968 7.00039113245116
0.03032985537314766 7.000391232833687
0.030329895179396563 7.000391283272962
0.030329915085787966 7.000391308617194
0.030329925040610047 7.000391321351903
0.03032993001888348 7.000391327750703
0.03032993250840372 7.000391330965898
0.030329933753499613 7.000391332581432
0.030329934375831636 7.000391333393187
0.030329934687117588 7.000391333801066
0.030329934842760566 7.0003913340060135
0.03032993492067801 7.0003913341089925
0.030329934959492797 7.000391334160736
0.030329934979116093 7.0003913341

In [14]:
ratio_obs = 0.75
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.029859301210831798, 6.999998407639509)
0.029744731018552194 6.999885285625549
0.029680507669107653 6.999856949866566
0.02966442696560506 6.999849950443339
0.029660405411208943 6.999848221463955
0.0296593999798302 6.999847794378313
0.029659148629919443 6.9998476888820536
0.02965908579551887 6.999847662823167
0.029659070087768812 6.999847656386347
0.02965906616108145 6.999847654796397
0.029659065179384998 6.999847654403668
0.029659064934089673 6.999847654306663
0.029659064872686113 6.999847654282702
0.02965906485732909 6.999847654276785
0.029659064853551163 6.99984765427532
0.02965906485256988 6.999847654274961
0.02965906485237363 6.9998476542748715
0.0296590648522019 6.999847654274849
0.029659064852250967 6.999847654274844
0.0296590648522755 6.999847654274841
0.0296590648522019 6.999847654274842
0.029659064852250967 6.999847654274842
0.029659064852226434 6.999847654274842
0.02965906485230003 6.999847654274842
0.029659064852226434 6.999847654274842
0

In [15]:
ratio_obs = 0.99
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.029884888074148765, 6.999648011992243)
0.0298633497600368 6.999634506813212
0.029861958095281593 6.999630933918819
0.02986194417848837 6.999630898988238
0.029861944039341087 6.999630898646738
0.02986194403794465 6.9996308986433995
0.02986194403793399 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.999630898643367
0.029861944037932468 6.9996308

# sigma_e = 0.09, sigma_z = 4


In [16]:
sigma_e = 0.09
W = 4


In [17]:
ratio_obs = 0.01
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.09060258933796365, 4.000334471421876)
0.09004336191057037 3.9999715170389343
0.09008627169698326 3.999943293079617
0.0901287343786927 3.999915325843596
0.09017075491316458 3.9998876130710754
0.09021233819687974 3.999860152520784
0.09025348906616605 3.9998329419698497
0.09029421229798881 3.9998059792136793
0.09033451261097385 3.999779262065828
0.09037439466598643 3.9997527883578767
0.09041386306724945 3.999726555939308
0.09045292236284083 3.999700562677381
0.09049157704569817 3.9996748064570102
0.09052983155429327 3.9996492851806384
0.09056769027349186 3.999623996768116
0.09060515753516456 3.9995989391565816
0.09064223761904754 3.9995741103003315
0.09067893475341916 3.999549508170705
0.09071525311581775 3.999525130755963
0.09075119683375246 3.9995009760611646
0.09078676998535927 3.999477042108046
0.09082197660006663 3.9994533269349053
0.09085682065944682 3.9994298285964764
0.09089130609754843 3.9994065451638168
0.09092543680184759 3.999383474724184


In [18]:
ratio_obs = 0.25
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.08934159315485436, 4.000904260344352)
0.08995689384794224 4.000542679055118
0.08992244635434063 4.000952605526149
0.08989541194697985 4.001262218186705
0.08987445987459022 4.00149604464052
0.08985836597527035 4.001672624051073
0.08984608342412953 4.001805965443587
0.0898367539687376 4.001906652514504
0.08982969244832069 4.001982679928598
0.08982436153502281 4.002040085961706
0.08982034500749096 4.002083430854802
0.0898173232745261 4.002116158377572
0.08981505249746023 4.002140869042182
0.08981334750082717 4.002159526513918
0.08981206814454097 4.002173613528724
0.08981110864434529 4.00218424965586
0.08981038930377372 4.002192280234883
0.08980985016809641 4.002198343538405
0.08980944618382142 4.0022029214890535
0.08980914352340945 4.002206377956205
0.08980891680430932 4.002208987673281
0.08980874698988613 4.0022109580723315
0.08980861980817129 4.002212445770397
0.08980852456244373 4.002213569017513
0.08980845323733441 4.0022144170954705
0.08980839982

In [19]:
ratio_obs = 0.5
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.09104958180538845, 3.9999302473966196)
0.09037028391643896 4.0001472382232155
0.09055475323964117 4.000220547456654
0.090646817032208 4.00025704503972
0.0906928043704796 4.000275214929263
0.09071578591604362 4.000284260394516
0.09072727315780571 4.000288763413715
0.09073301564520533 4.000291005089779
0.0907358864803434 4.0002921210250895
0.09073732173335872 4.000292676549003
0.09073803928763693 4.000292953093038
0.0907383980312152 4.0002930907581025
0.09073857738691238 4.000293159288166
0.09073866705688206 4.00029319340247
0.09073871188800481 4.00029321038451
0.0907387343016214 4.000293218838099
0.0907387455074814 4.000293223046241
0.09073875110994181 4.000293225141018
0.0907387539109193 4.000293226183774
0.09073875531128373 4.000293226702845
0.09073875601143386 4.0002932269612295
0.09073875636146081 4.000293227089849
0.09073875653649031 4.000293227153873
0.09073875662398903 4.000293227185742
0.09073875666770631 4.000293227201607
0.0907387566895810

In [20]:
ratio_obs = 0.75
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.08926573641827294, 4.0007638534703425)
0.08940213094190004 4.000943856230536
0.0892504839240553 4.001178446847302
0.08921243642685246 4.001236728684352
0.08920291625467375 4.00125120675047
0.08920053572958705 4.001254803221806
0.08919994057647765 4.001255696609194
0.08919979178883498 4.001255918532181
0.0891997545924504 4.001255973659206
0.08919974529351804 4.001255987353091
0.08919974296881028 4.001255990754736
0.08919974238765781 4.001255991599725
0.08919974224236255 4.001255991809625
0.08919974220605607 4.001255991861766
0.08919974219697742 4.001255991874717
0.08919974219470571 4.001255991877935
0.08919974219412656 4.001255991878734
0.0891997421939879 4.001255991878932
0.08919974219395935 4.001255991878981
0.08919974219393896 4.001255991878994
0.0891997421939471 4.001255991878997
0.0891997421939308 4.001255991878998
0.08919974219392672 4.001255991878998
0.0891997421939308 4.001255991878998
0.08919974219392672 4.001255991878998
0.0891997421939308

In [21]:
ratio_obs = 0.99
test(sigma_e, W, ratio_obs)

Complete Data solution: (sig_e,W): (0.08999264672029125, 3.999577074146076)
0.0899426929872669 3.999611437483035
0.08994208579789942 3.9996066162402792
0.08994207943690245 3.9996065564191934
0.08994207936972878 3.9996065556770795
0.08994207936901336 3.999606555667874
0.08994207936900489 3.9996065556677607
0.08994207936900514 3.999606555667759
0.08994207936900502 3.999606555667759
0.08994207936900527 3.999606555667759
0.08994207936900502 3.999606555667759
0.08994207936900527 3.999606555667759
0.08994207936900502 3.999606555667759
0.08994207936900527 3.999606555667759
0.08994207936900502 3.999606555667759
0.08994207936900527 3.999606555667759
0.08994207936900502 3.999606555667759
0.08994207936900527 3.999606555667759
0.08994207936900502 3.999606555667759
0.08994207936900527 3.999606555667759
0.08994207936900502 3.999606555667759
0.08994207936900527 3.999606555667759
0.08994207936900502 3.999606555667759
0.08994207936900527 3.999606555667759
0.08994207936900502 3.999606555667759
0.0899420