In [1]:
import random
import math
import linear_algebra as la
from gradient_descent import gradient_step, minibatches

## Data and Explicit MLE

In [2]:
X = []
for _ in range(1000):
    X.append(random.gauss(1,2))

# explicit MLE:
hat_mu = sum(X)/1000
hat_sigma = math.sqrt(sum(((x-hat_mu)**2)/1000 for x in X ))

## Log-likelihood Function and its Gradient

In [3]:
def log_likelihood(X: la.Vector,mu: float, sigma: float) -> float:
    n = len(X)
    return -.5*sum(((x-mu)/sigma)**2 for x in X ) - n*math.ln(sigma)

def grad_log_likelihood(X: la.Vector,mu: float, sigma: float) -> la.Vector:
    n = len(X)
    return [sum((x-mu)/(sigma**2) for x in X ),
            sum(((x-mu)/sigma)**2 for x in X )/sigma - n/sigma]

## Maximizing the Log-Likelihood Function

In [4]:
theta = [0,1]
learning_rate = .0001
for epoch in range(1000):
    grad = grad_log_likelihood(X,theta[0],theta[1])
    theta = gradient_step(theta , grad , learning_rate)# −gradient step 
    print( epoch , theta )

print('\n\nMLE:',[hat_mu,hat_sigma])

0 [0.0942653681224285, 1.3624900607991421]
1 [0.1399666089434011, 1.4652730481075278]
2 [0.17735262134823027, 1.5362711943455287]
3 [0.20977885740030977, 1.5903795841739548]
4 [0.23875417591914067, 1.6337376335745835]
5 [0.2651263560516418, 1.6695739410380623]
6 [0.28943246835418707, 1.6998253649723756]
7 [0.3120399230183181, 1.7257563420472375]
8 [0.33321399989085865, 1.7482429003771376]
9 [0.3531540929930829, 1.7679194746465763]
10 [0.3720148250948748, 1.7852616184261967]
11 [0.3899191357318093, 1.8006357435958835]
12 [0.4069667996403704, 1.814330607772253]
13 [0.4232401942892053, 1.8265780938949017]
14 [0.4388083344342734, 1.8375674036935568]
15 [0.4537297730031489, 1.8474550378298202]
16 [0.4680547363498283, 1.856371989603218]
17 [0.48182672812672395, 1.8644290426390588]
18 [0.49508375551692074, 1.8717207461686114]
19 [0.5078592814648227, 1.8783284477936628]
20 [0.5201829744254483, 1.8843226415215397]
21 [0.5320813060238346, 1.8897648098047939]
22 [0.5435780327929429, 1.89470888591

  ## Mini-Batch Estimation  

In [5]:
theta = [0,1]
learning_rate = .0001
for epoch in range(1000):
    for batch in minibatches(X , batch_size=10):
        grad = grad_log_likelihood(batch,theta[0],theta[1])
        theta = gradient_step(theta , grad , learning_rate)# −gradient step 
    print( epoch , theta )

print('\n\nMLE:',[hat_mu,hat_sigma])

0 [0.07104894842919582, 1.2328649351675844]
1 [0.12124676261484035, 1.3593872537030667]
2 [0.16189170432587044, 1.4466069717046954]
3 [0.19672854533298686, 1.5121913593561176]
4 [0.22760164475136582, 1.5644444590066575]
5 [0.2554796917649464, 1.6074033752656607]
6 [0.280994518760317, 1.643436688503812]
7 [0.30455940825163325, 1.6741331656553808]
8 [0.32654613612869166, 1.7007319227318909]
9 [0.3472080092459414, 1.7240518470003858]
10 [0.36665479222360575, 1.7445255150558499]
11 [0.38504986975291006, 1.7626693302059804]
12 [0.40258311616803477, 1.7789836359550868]
13 [0.41924632980459736, 1.7935100450092418]
14 [0.43513310725816906, 1.8065329335564082]
15 [0.45033494425335596, 1.8182760131545501]
16 [0.4649487605456461, 1.8289843506432006]
17 [0.47897187863095986, 1.8386789699481152]
18 [0.4924010923674314, 1.847361318420962]
19 [0.5053806542925617, 1.8554396284194556]
20 [0.5178705146275284, 1.8627122863632264]
21 [0.5299132241024337, 1.8694220663916687]
22 [0.5414812519966261, 1.87536

519 [0.9426876258641333, 1.9332110091217467]
520 [0.9426919781281263, 1.9332018345592221]
521 [0.9426838422812528, 1.9332046070168574]
522 [0.9426778946592677, 1.9331855198893488]
523 [0.9426772016623185, 1.9331819101571341]
524 [0.9426823181821911, 1.9331516389494667]
525 [0.9426900738088266, 1.9331248842082562]
526 [0.9426686961182197, 1.933150257752281]
527 [0.942662109898103, 1.933085740228385]
528 [0.9426616892807703, 1.9330674227200002]
529 [0.9426508765559465, 1.9331194351700915]
530 [0.9426423732626501, 1.9331666214401682]
531 [0.9426367512581699, 1.9331521687849504]
532 [0.9426481219019057, 1.9331648837223288]
533 [0.9426702988121489, 1.9331741259297495]
534 [0.9426766060546037, 1.9331337379374884]
535 [0.9426666075046338, 1.9330955233557108]
536 [0.9426653472486901, 1.9330767853318134]
537 [0.9426756093546492, 1.9330519964912696]
538 [0.9426852233133896, 1.9330470434547575]
539 [0.9427008772045591, 1.9330422652876402]
540 [0.9426994659176844, 1.9330310352667757]
541 [0.942699

## Stochastic-Gradient Estimation  

In [6]:
theta = [0,1]
learning_rate = .0001
for epoch in range(1000):
    for batch in minibatches(X , batch_size=1):
        grad = grad_log_likelihood(batch,theta[0],theta[1])
        theta = gradient_step(theta , grad , learning_rate)# −gradient step 
    print( epoch , theta )

print('\n\nMLE:',[hat_mu,hat_sigma])

0 [0.07143753205790519, 1.2327525213837662]
1 [0.12136951579653052, 1.3590940494723818]
2 [0.16209450310141346, 1.446295727455265]
3 [0.19692624817524, 1.5120719274961165]
4 [0.22777623397898236, 1.5643271687040652]
5 [0.25560125902613223, 1.6071713951800939]
6 [0.28107498988288604, 1.6431361824637944]
7 [0.3047037545757205, 1.6740245913935898]
8 [0.32672438988486957, 1.7007082513498493]
9 [0.3474147328369876, 1.7241268576504272]
10 [0.36686719920898025, 1.7446160711284533]
11 [0.38525397962969005, 1.7627203042413975]
12 [0.40275922281625837, 1.778939810829481]
13 [0.41942250213370436, 1.7934941736292562]
14 [0.43533764091730465, 1.8065879721393876]
15 [0.4505524012445735, 1.8183623891859013]
16 [0.4651465029989738, 1.8290106121695775]
17 [0.4791254085872838, 1.8386527375247885]
18 [0.492560369443273, 1.8474191107267588]
19 [0.5055158833288942, 1.855431222212188]
20 [0.5179872024199738, 1.8626841929066167]
21 [0.529989972957904, 1.8692645895061764]
22 [0.5416311481871319, 1.87533739595

206 [0.939660753779533, 1.9331794199563994]
207 [0.9397346708576135, 1.9332261703599072]
208 [0.9398216750466826, 1.9331664080153799]
209 [0.939906021622986, 1.9331561463037554]
210 [0.9399962400703317, 1.9331662688347988]
211 [0.9400670675019865, 1.9330771877214536]
212 [0.9401205213910926, 1.9330543109609084]
213 [0.9401718733052841, 1.9330054507072791]
214 [0.9402377934468122, 1.9330017794292838]
215 [0.9403335591080731, 1.9330641081190485]
216 [0.9403900614594486, 1.9331055705691793]
217 [0.9404187822939383, 1.9330999636809396]
218 [0.9404929489467261, 1.9331390907622785]
219 [0.9405530476536627, 1.9331437304579224]
220 [0.9405997535056233, 1.9331201049943945]
221 [0.9406208288053404, 1.9330771840585599]
222 [0.9406750597826444, 1.9331175140447405]
223 [0.9407462973825861, 1.9331041902739028]
224 [0.94078232487995, 1.9331123484874635]
225 [0.9408171047219881, 1.9330929952510263]
226 [0.9408797420893374, 1.9331070677109785]
227 [0.9409195552372401, 1.9331247734175534]
228 [0.9409573

452 [0.9426409185722444, 1.932961665703155]
453 [0.9426387454712134, 1.9329970136652885]
454 [0.9426742245834006, 1.9329909286017457]
455 [0.9426847676985355, 1.9330555750523468]
456 [0.9426881610586932, 1.9330229307369082]
457 [0.9426833453287184, 1.9330475697055332]
458 [0.9426777778237593, 1.9330210556046092]
459 [0.9426680130116204, 1.9330399406736645]
460 [0.9426573303969459, 1.9330434353778971]
461 [0.9426507532307737, 1.9329757875812776]
462 [0.9426446855010584, 1.9329687497323336]
463 [0.9426366014450295, 1.9329757581336544]
464 [0.9426014085310519, 1.9329932555943559]
465 [0.9426088487336658, 1.933000452697082]
466 [0.9426121675845189, 1.9330103615481233]
467 [0.9426161391311524, 1.9329866270874225]
468 [0.9426296383491816, 1.9329789097317052]
469 [0.9426259965321424, 1.9329682471413177]
470 [0.9426238432055749, 1.9329989652174995]
471 [0.9426405454112498, 1.9330569093680043]
472 [0.942656784321461, 1.9330339861715673]
473 [0.942640474392432, 1.9330587556953385]
474 [0.9426436

683 [0.942648730787886, 1.9330372090504697]
684 [0.9426466499707246, 1.9330391128683482]
685 [0.9426346730370511, 1.9330268023431365]
686 [0.9426391936396451, 1.9330295899410528]
687 [0.942627963576318, 1.9330131607242513]
688 [0.9426293611722334, 1.9330330330279937]
689 [0.9426311363015379, 1.9330090373724238]
690 [0.9426327234863752, 1.9330439984744197]
691 [0.942636552625777, 1.9330418164967427]
692 [0.9426320328215637, 1.9330140102423086]
693 [0.9426331149669471, 1.9330176741114686]
694 [0.9426457852765402, 1.9330765491344037]
695 [0.9426649058681617, 1.9330970233101623]
696 [0.9426535534775426, 1.9330718183233926]
697 [0.9426470406620213, 1.9330793997926123]
698 [0.9426438826362433, 1.9330721358864007]
699 [0.9426380337450052, 1.9330337551671095]
700 [0.9426381649507266, 1.9330090625879648]
701 [0.9426341435845763, 1.9329943405673105]
702 [0.9426466787851691, 1.9330304832707959]
703 [0.9426315556013819, 1.933062403096971]
704 [0.942638465623754, 1.9331298007816244]
705 [0.94264744

924 [0.9426112873540538, 1.9330571737439994]
925 [0.9426110045273342, 1.9331049396224715]
926 [0.9426313607165688, 1.9331175234365126]
927 [0.9426446310748281, 1.9331016158290981]
928 [0.9426614746549272, 1.933098090856142]
929 [0.9426670673442341, 1.9330991100033939]
930 [0.9426776668555489, 1.93305987901071]
931 [0.9426983430755724, 1.9331030556342195]
932 [0.9427067109666483, 1.9331043790605993]
933 [0.9427080812383277, 1.9331215111700686]
934 [0.9427192509119803, 1.9330865233075953]
935 [0.9427114617240444, 1.9330665467452872]
936 [0.9427007968663758, 1.9330623404207803]
937 [0.9426843220055627, 1.9330635705663306]
938 [0.942696024678242, 1.9330507399029617]
939 [0.942691092475298, 1.933086131026145]
940 [0.9426958503243579, 1.9330726018701916]
941 [0.9427023760895995, 1.9330162360478598]
942 [0.9426887525056703, 1.9329832058360927]
943 [0.9427028085238446, 1.9329333169118514]
944 [0.9427112295878389, 1.9329549791893192]
945 [0.9426969273792943, 1.9328632602024722]
946 [0.942691320