In [25]:
import numpy as np
np.random.seed(2)
import copy

In [4]:
import signal
def signal_handler(signum, frame):
    raise Exception("Timed out!")
signal.signal(signal.SIGALRM, signal_handler)
signal.alarm(2*60)   # seconds

0

In [5]:
def sol_data2gaussian(S):
    mu = np.sum(S)/len(S)
    sigma2 = np.sum((S-mu)**2)/len(S)
    sigma = np.sqrt(sigma2)
    
    return mu,sigma

In [6]:
def data2gaussian():
    S = np.random.normal(10.5, .2, 50)
    mu1, sigma1 = my_data2gaussian(S)
    
    mu,sigma = my_data2gaussian(S)
    assert np.isclose(mu, mu1, atol=.001), mu
    assert np.isclose(sigma, sigma1, atol=.001), sigma
    return

In [7]:
def my_data2gaussian(S):
    '''
    Return optimal parameters - (mu,sigma)
    Inputs:
        S: np array of shape (Ns,). These are samples of a random variable.
    Outputs:
        mu: float
        sigma: float
    '''

    ### WRITE YOUR CODE HERE - 5 MARKS

    mu = np.sum(S)/len(S)
    sigma = 0
    for x in S:
      sigma = sigma + (x - mu)**2
    sigma = sigma/len(S)
    sigma = np.sqrt(sigma)

    return mu, sigma

In [8]:
data2gaussian()

In [9]:
def sol_data2laplacian(S):
   
    mu = np.median(S)
    b = np.sum(np.abs(S-mu))/len(S)
    return mu, b

In [10]:
def my_data2laplacian(S):
    '''
    Return optimal parameters - (mu,b). See https://en.wikipedia.org/wiki/Laplace_distribution
    Inputs:
        S: np array of shape (Ns,). These are samples of a random variable.
    Outputs:
        mu: float
        b: float
    '''
    mu = np.median(S)
    b = 0
    for x in S:
      b = b + abs(x - mu)
    b = b/len(S)

    ### WRITE YOUR CODE HERE - 5 MARKS

    return mu, b

In [11]:
def data2laplacian():
    S = np.random.normal(10.5, .2, 50)
    mu1, b1 = my_data2laplacian(S)
    
    mu,b = my_data2laplacian(S)
    
    assert np.isclose(mu, mu1, atol=.001), mu
    assert np.isclose(b, b1, atol=.001), b
    return

In [12]:
data2laplacian()

In [13]:
def sol_data2uniform(S):
    a = np.min(S)
    b = np.max(S)
    return a, b

In [14]:
def my_data2uniform(S):
    '''
    Return optimal parameters - (a,b)
    Inputs:
        S: np array of shape (Ns,). These are samples of a random variable.
    Outputs:
        a: float
        b: float
    '''
    b = np.max(S)
    a = b - (np.max(S) - np.min(S))
    # a = b - 2*(np.sum(S))/len(S)

    ### WRITE YOUR CODE HERE - 5 MARKS

    return a, b

In [15]:
def data2uniform():
    S = np.random.normal(10.5, .2, 50)
    a1,b1 = my_data2uniform(S)
    
    a,b = my_data2uniform(S)
    
    assert np.isclose(a, a1, atol=.001), a
    assert np.isclose(b, b1, atol=.001), b
    return
    

In [16]:
data2uniform()

In [17]:
def data2model():
    S = np.random.normal(10.5, .2, 500)
    modelName1 = my_data2model(S)
    
    modelName = "gaussian"
    assert modelName == modelName1, modelName

    S = np.random.laplace(2, 1, 500)
    modelName1 =  my_data2model(S)
    
    modelName = "laplacian"
    assert modelName == modelName1, modelName
    
    S = np.random.uniform(0, 1, 500)
    modelName1 =  my_data2model(S)
    
    modelName = "uniform"
    assert modelName == modelName1, modelName
    return

In [18]:
def sol_sampleGMM(pi, mu, sigma, Ns=1):
   
    K=len(pi)
    S=[]
    for i in range(Ns):
        z = np.random.choice(np.arange(K), p=pi)
        sample  =np.random.normal(mu[z],sigma[z])
        S.append(sample)
    
    S=np.array(S)
    
    
    return S

In [19]:
def my_sampleGMM(pi, mu, sigma, Ns=1):
    '''
    Inputs:
        pi: np.array of shape (K,), p(z_k)
        mu: np.array of shape (K,), mu of kth gaussian
        sigma: np.array of shape (K,), sigma of kth gaussian
        Ns: int, number of samples
    Outputs:
        S: np.array of shape (Ns,), samples from the GMM
    '''

    ### WRITE YOUR CODE HERE - 10 MARKS

    S = []
    for i in range(Ns):
      k = np.random.choice(np.arange(0,len(pi)), p=pi)
      x = np.random.normal(mu[k],sigma[k],1)
      S.append(x[0])
    
    S = np.array(S)




    return S

In [20]:
def sampleGMM():
    
       
    pi = [0,1]
    mu = [-1.1, 1.3]
    sigma = [1.5, 0.4]
    Ns = 500
    S1 = my_sampleGMM(pi, mu, sigma, Ns)
    mu1 = np.mean(S1)
    sigma1 = np.std(S1)
        
    assert np.isclose(mu[1], mu1, atol=.1), mu
    assert np.isclose(sigma[1], sigma1, atol=.1), sigma
    return
    
    
    
    

In [21]:
sampleGMM()

In [28]:
np.random.seed(0)

def multi_gaussian(x, mu, sigma):
  x = np.array(x)
  ans = -(0.5)* np.dot(np.dot((x - mu), np.linalg.inv(sigma)) , (x-mu).T)
  ans = np.exp(ans)
  k = mu.shape[0]
  det = np.linalg.det(sigma)
  if det<=0:
    det = 10**(-2)
  ans = ans/np.sqrt((2*np.pi)**(k)*det)

  return ans

def my_data2GMM(S, K):
    '''
    Return optimal parameters - (pi,mu,sigma)
    Inputs:
        S: np array of shape (Ns,Na). These are samples of a random variable. Na can be 1, 2 or 3
    Outputs:
        pi: np array of shape (K,)
        mu: np array of shape (K,Na)
        sigma: np array of shape (K,Na,Na)
    '''

    ### WRITE YOUR CODE HERE - 15 MARKS

    
    # initialization
    pi = np.array([1/K]*K)
    Ns = S.shape[0]
    dim = S.shape[1]
    sigma = np.random.random((K,dim,dim))
    mu = np.random.random((K,dim))

    r = np.zeros((Ns,dim))
    for i in range(Ns):
      S_i = np.expand_dims(S[i],axis = 0)
      s = 0
      for k in range(K):
        mu_k = np.expand_dims(mu[k],axis = 0)
        s = s + pi[k]*multi_gaussian(S_i, mu_k, sigma[k])
      for k in range(K):
        r[i][k] = pi[k]*multi_gaussian(S_i, mu_k, sigma[k])/s

    
    
    eps = 10**(-3)
    ct = 0


    while 1:
      ct+=1
      # print(ct)
      mu_old = copy.deepcopy(mu)
      sigma_old = copy.deepcopy(sigma)
      pi_old = copy.deepcopy(pi)

      #pi
      for i in range(K):
        c = 0
        for j in range(Ns):
          c = c + r[j][i]
        pi[i] = c/Ns

      #mu
      for i in range(K):
        x = np.zeros((dim))
        c = 0
        for j in range(Ns):
          c = c + r[j][i]
          x = x + r[j][i]*S[j]
        if c!=0:
          mu[i] = x/c
      
      #sigma
      for i in range(K):
        x = np.zeros((dim,dim))
        c = 0
        mu_k = np.expand_dims(mu[i],axis = 0)
        for j in range(Ns):
          S_j = np.expand_dims(S[j],axis = 0)
          c = c + r[j][i]
          x = x + r[j][i]*np.matmul((S_j - mu_k).T, (S_j - mu_k))
        if c!=0:
          sigma[i] = x/c
      
      # try:
      #   for k in range(K):
      #     inv = np.linalg.inv(sigma[k])
      # except:
      #   print(sigma[k])
      #   print("here")
      #   break
      
      #r
      for i in range(Ns):
        S_i = np.expand_dims(S[i],axis = 0)
        s = 0
        for k in range(K):
          mu_k = np.expand_dims(mu[k],axis = 0)
          s = s + pi[k]*multi_gaussian(S_i, mu_k, sigma[k])
        for k in range(K):
          r[i][k] = pi[k]*multi_gaussian(S_i, mu_k, sigma[k])/s

      #convergence
      change = np.linalg.norm(mu - mu_old) + np.linalg.norm(sigma - sigma_old) + np.linalg.norm(pi - pi_old)
      if change < eps:
        break

    return pi, mu, sigma

In [31]:
def data2GMM():
    pi = [0.5,0.5]
    mu = [-10.1, 10.3]
    sigma = [0.5, 0.4]
    Ns = 500
    S = my_sampleGMM(pi, mu, sigma, Ns)
    S = S.reshape(-1,1)
    c = 0
    
    for _ in range(2):
        try:
            pi1,mu1,sigma1 = my_data2GMM(S,2)
        except Exception as msg:
            print(msg)
        mu1 = np.sort(mu1.reshape(-1))
        print(mu)
        print(mu1)
        if np.all(np.isclose(mu, mu1, atol=.1)):
            c = c+1
            break
        print("another try:", c)
            
    assert(c>0)
        
    return

In [30]:
data2GMM()

IndexError: index 1 is out of bounds for axis 0 with size 1