%reload_ext autoreload
%autoreload 2
import sys
sys.path.append("../")

In [160]:
from IMLearn.learners import UnivariateGaussian, MultivariateGaussian
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

In [41]:
def univariate_gaussian_estimation():
    # Q1:
    mu, var = 10, 1
    samples = np.random.normal(loc=mu, scale=var, size=1000)

    estimator = UnivariateGaussian().fit(samples)
    print((estimator.mu_, estimator.var_))

    # Q2:
    expectations = [abs(mu - UnivariateGaussian().fit(samples[:n]).mu_) for n in range(10, 1010, 10)]
    fig_1 = px.scatter(x=[n for n in range(10, 1010, 10)], y=expectations,
                    labels={
                        "x": "sample size",
                        "y": "absolute distance"})
    
    _fig_1_config(fig_1)
    fig_1.show()
    
    # Q3:
    # I'm expecting to see a normal (Gaussian) distribution around 10, with a variance of 1
    pdfs = estimator.pdf(samples)
    X = np.column_stack((samples, pdfs))
    X = X[np.argsort(X[:, 1])]
    
    fig_2 = px.scatter(x=X[:, 0], y=X[:, 1],
                      labels={
                          "x": "sample",
                          "y": "empirical PDF"})
    _fig_2_config(fig_2)
    fig_2.show()

In [42]:
def _fig_1_config(fig):
    fig.update_layout(
        title={
            "text": "Error of Estimated Value of Expectation, as a Function of Sample Size",
            "x": 0.5, 
            "y": 0.95},
        template="simple_white",
        font_color="black",
        title_font_family="Helvetica",
        title_font_color="black",
    )
    fig.update_traces(
        marker=dict(size=5, symbol="diamond", color="black"),
        selector=dict(mode="markers"),
    )
    fig.update_xaxes(
        title="Sample Size",
        title_font_family="Times New Roman", tickmode = "linear", tick0 = 0, dtick = 50)
    fig.update_yaxes(
        title="Absolute distance between est. and true value of expectation",
        title_font_family="Times New Roman")

def _fig_2_config(fig):
    fig.update_layout(
        title={
            "text": "Empirical PDF Under the Fitted Model",
            "x": 0.5, 
            "y": 0.95},
        template="simple_white",
        font_color="black",
        title_font_family="Helvetica",
        title_font_color="black")
    fig.update_traces(
        marker=dict(size=5, symbol="diamond", color="black"),
        selector=dict(mode="markers"))
    fig.update_xaxes(
        title="x (sample)",
        title_font_family="Times New Roman")
    fig.update_yaxes(
        title="Empirical PDF",
        title_font_family="Times New Roman")

In [250]:
def multivariate_gaussian_estimation():
    # Q4:
    cov = np.array(
        [[1, 0.2, 0, 0.5],
         [0.2, 2, 0, 0],
         [0, 0, 1, 0],
         [0.5, 0, 0, 1]])
    samples = np.random.multivariate_normal(mean=[0, 0, 4, 0], cov=cov, size=1000)
    estimator = MultivariateGaussian().fit(samples)
    print("Estimated expectation: \n", estimator.mu_, "\n")
    print("Estimated covariance matrix: \n", estimator.cov_, "\n")
    # estimator.pdf(samples)  # TODO not working! error

    # Q5:
    f_values = np.linspace(-10, 10, 200)
    m = np.zeros((200, 200))
    for i in range(200):
        for j in range(200):
            m[i, j] = estimator.log_likelihood([f_values[i], 0, f_values[j], 0], cov, samples)

    heatmap = go.Figure(go.Heatmap(
        x=f_values,
        y=f_values,
        z=m),
        layout={
            "template": "simple_white",
            "font_color": "black",
            "title_font_family": "Times New Roman",
            "title_font_color": "black",
            "title": "Empirical Log-Likelihood of Multivariate Gaussian Estimation as a Function of Expectation features 1, 3",
            "xaxis_title": "f1",
            "yaxis_title": "f3",
        },
    )
    heatmap.show()

    # Q6:
    argmax_row, argmax_col = np.unravel_index(m.argmax(), m.shape)
    print("The model that achieved maximal log-likelihood value (upon examining entries 1, 3 of the Expectation): ")
    print("[{:.3f}, {:.3f}] achieved a value of {:.3f}".format(f_values[argmax_row], f_values[argmax_col], m.max()))


In [251]:
if __name__ == '__main__':
    np.random.seed(173)
    univariate_gaussian_estimation()
    multivariate_gaussian_estimation()

(10.000348480377385, 0.9485592794250123)


Estimated expectation: 
 [0.02191194 0.02506859 4.01073323 0.04053529] 

Estimated covariance matrix: 
 [[ 1.03058125  0.15752748  0.03548712  0.51062277]
 [ 0.15752748  2.13244001 -0.03130581 -0.01259556]
 [ 0.03548712 -0.03130581  0.98683487 -0.04115146]
 [ 0.51062277 -0.01259556 -0.04115146  0.9685384 ]] 



The model that achieved maximal log-likelihood value (upon examining entries 1, 3 of the Expectation): 
[-0.050, 3.970] achieved a value of -5891.375
