# Data
by Ilya Zharikov

In [2]:
import numpy as np

In [22]:
def sampling_operator(nsamp, dims, seed=1):
    """
    Sampling operator (uniform distribution)
    Input:
    nsamp : number of samples
    dims : size of the matrix
    seed : set random seed (default 1)
    Output:
    array of indices (size nsamp x 2)
    """
    np.random.seed(seed)
    idx = np.random.choice(np.prod(dims), nsamp, replace=False)
    
    return np.vstack(np.unravel_index(idx, dims)).T    

In [30]:
sampling_operator(3, (3, 4), seed=1)

array([[0, 2],
       [0, 3],
       [1, 0]], dtype=int64)

---

In [31]:
def get_matrix(rank, dims, noise='n', sigma=1e-3, seed=1):
    """
    Get synthetic matrices of desired rank
    Input:
    rank : rank of the matrix
    dims : size of the matrix
    noise : 'y' - with noise or 
            'n' - without noise (default 'n')
            Noise from N(mean=0, variance=sigma).
    sigma : variance of the noise (default 1e-3)
    seed :set random seed (default 1)
    Output:
    M : matrix of the desired rank
    """
    np.random.seed(seed)
    M_l = np.random.randn(dims[0], rank)
    M_r = np.random.randn(dims[1], rank)
    M = M_l.dot(M_r.T)
    if noise == 'y':
        M += sigma * np.random.randn(M.shape[0], M.shape[1])
    
    return M

In [32]:
get_matrix(2, (3, 4))

array([[ 3.2998501 ,  0.6707836 ,  3.63527254, -0.28876918],
       [-0.10480917,  0.09905915,  1.43822223,  0.58236993],
       [ 3.26192055,  0.85003444,  6.00681293,  0.60489365]])

In [33]:
get_matrix(2, (3, 4), 'y')

array([[ 3.30098387,  0.66968371,  3.63510011, -0.28964703],
       [-0.10476695,  0.09964196,  1.43712161,  0.58351466],
       [ 3.26282214,  0.85053693,  6.00771378,  0.60420992]])

---

In [34]:
def get_sampling_matrix(M, omega):
    """
    Get sampling matrix
    Input:
    M : array like
    omega : array of sampling indices
    Output:
    data: array like with sampling elements
    """
    data = np.zeros(M.shape)
    for i in xrange(omega.shape[0]):
        data[omega[i, 0], omega[i, 1]] = M[omega[i, 0], omega[i, 1]]
        
    return data

In [35]:
omega = sampling_operator(3, (3, 4), seed=1)
M = get_matrix(2, (3, 4))
get_sampling_matrix(M, omega)

array([[ 0.        ,  0.        ,  3.63527254, -0.28876918],
       [-0.10480917,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ]])

---

In [36]:
def get_sampling_vector(M, omega):
    """
    Get sampling matrix
    Input:
    M : array like
    omega : array of sampling indices
    Output:
    data: vector with sampling elements
    """
    data = np.zeros(omega.shape[0])
    for i in xrange(omega.shape[0]):
        data[i] = M[omega[i, 0], omega[i, 1]]
        
    return data

In [37]:
get_sampling_vector(M, omega)

array([ 3.63527254, -0.28876918, -0.10480917])

---

In [41]:
def get_data(data_type, rank, dims, noise='n', sigma=1e-3, seed=1):
    """
    Get data
    Input:
    data_type : type of desired data
    If data_type='synthetic' :
        rank : rank of the matrix
        dims : size of the matrix
        noise : 'y' - with noise or 
                'n' - without noise (default 'n')
                Noise from N(mean=0, variance=sigma).
        sigma : variance of the noise (default 1e-3)
        seed :set random seed (default 1)
    Output:
    M : matrix
    """ 
    M = get_matrix(rank, dims, noise, sigma, seed)
    
    return M

In [44]:
get_data('synthetic', 2, (3, 4))

array([[ 3.2998501 ,  0.6707836 ,  3.63527254, -0.28876918],
       [-0.10480917,  0.09905915,  1.43822223,  0.58236993],
       [ 3.26192055,  0.85003444,  6.00681293,  0.60489365]])