# STA 663 Final Project
## Biclustering via Sparse Singular Value Decomposition

In [11]:
import numpy as np
import math
from numba import jit
import seaborn as sns

In [13]:
# function to implement thresholdig rule
@jit
def thresh(z, delta):
    return np.sign(z)*(np.abs(z) >= delta)*(np.abs(z) - delta)

In [14]:
@jit
def ssvd(X, gamu = 2, gamv = 2, merr = 10**(-4), niter = 100):
    # get the first triplet
    U, S, V = np.linalg.svd(X, full_matrices = True)
    u0 = U[0]
    v0 = V[0]
    # dimension of X
    n = X.shape[0]
    d = X.shape[1]
    ud = 1
    vd = 1
    iteration = 0
    SST = np.sum(X**2)
    
    while ((ud > merr) or (vd > merr)):
        print(iteration)
        iteration = iteration + 1
        ### update v ###
        z = X.T @ u0
        # inversed weight
        winv = np.abs(z)**gamv
        # sigma squared 
        sigsq = np.abs(SST - np.sum(z**2)) / (n*d - d)
        
        cand = z * winv;
        delt = np.sort(np.append(0, np.abs(cand)))
        delt_uniq = np.unique(delt)
        Bv = np.ones(len(delt_uniq) - 1)*math.inf
        
        index = np.where(winv > 10**(-8))
        cand1 = cand[index]
        winv1 = winv[index]
        for i in range(len(Bv)):
            temp2 = thresh(cand1, delt_uniq[i])
            temp2 = temp2/winv1
            temp3 = np.zeros(d)
            temp3[index] = temp2
            Bv[i] = np.sum((X - u0[:, None] @ temp3[None, :])**2)/sigsq + np.sum(temp2!=0)*np.log(n*d)
        # choose the smallest BIC 
        Iv = np.argmin(Bv)
        th = delt_uniq[Iv]
        temp2 = thresh(cand1, th)
        temp2 = temp2/winv1
        v1 = np.zeros(d)
        v1[index] = temp2
        v1 = v1/np.linalg.norm(v1)
        
        ### update u ###
        z = X @ v1
        # inversed weight
        winu = np.abs(z)**gamu
        # sigma squared 
        sigsq = np.abs(SST - np.sum(z**2)) / (n*d - n)
        
        cand = z * winu;
        delt = np.sort(np.append(0, np.abs(cand)))
        delt_uniq = np.unique(delt)
        Bu = np.ones(len(delt_uniq) - 1)*math.inf
        
        index = np.where(winu > 10**(-8))
        cand1 = cand[index]
        winu1 = winu[index]
        for i in range(len(Bu)):
            temp2 = thresh(cand1, delt_uniq[i])
            temp2 = temp2/winu1
            temp3 = np.zeros(n)
            temp3[index] = temp2
            Bu[i] = np.sum((X - temp3[:,None] @ v1[None, :])**2)/sigsq + np.sum(temp2!=0)*np.log(n*d)
        # choose the smallest BIC 
        Iu = np.argmin(Bu)
        th = delt_uniq[Iu]
        temp2 = thresh(cand1, th)
        temp2 = temp2/winu1
        u1 = np.zeros(n)
        u1[index] = temp2
        u1 = u1/np.linalg.norm(u1)
        
        # difference in old and new 
        ud = np.linalg.norm(u0-u1)
        vd = np.linalg.norm(v0-v1)
        
        if(iteration > niter):
            break;
        # update u0 and u1
        u0 = u1
        v0 = v1
    s = u1[None,:] @ X @ v1[:, None]
    return u1, v1, s, iter

In [4]:
data = np.loadtxt("data.txt", dtype = "float", delimiter = " ")
data = data.T

In [15]:
res = ssvd(data)

0
1
2
3
4
5
6


KeyboardInterrupt: 

In [None]:
layer = s * u1[:, None] * v1[None, :]