In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import numpy as np
import copy
np.set_printoptions(suppress=True)

data = load_iris()
x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.33, random_state=42)

def generalized_least_squares(x,y,W0,L,g,n,d,k,thres):
  def is_close(a,b,thres):
    s = 0
    for i in range(len(a)):
      for j in range(len(a[i])):
        s += abs(a[i][j] - b[i][j])
    #print(s)
    return s < thres 

  #compute sigma hat inverse
  sigma_hat = np.zeros([d,d]) #dxd
  for i in range(n):
    sigma_hat = np.add(sigma_hat,np.matmul(np.transpose([x[:,i]]),np.array([x[:,i]])))
  sigma_hat_inv = np.linalg.inv(1/n * np.around(sigma_hat,5)) #dxd

  #print(sigma_hat_inv)
    
  #init W
  prev_W = W0
  W = W0

  t = 0
  while (t == 0 or not is_close(W,prev_W,thres)): #while not converge
    t += 1
    
    prev_W = copy.deepcopy(W)

    #compute y_hat = kxn
    y_hat = []
    
    for i in range(n):
      y_hat.append(np.array(g(np.matmul(W,np.array(x[:,i])))))
    y_hat = np.transpose(np.array(y_hat))

    #compute E_hat = kxd
    E_hat = np.zeros([k,d])
    for i in range(n):
      E_hat = np.add(E_hat, np.matmul(np.transpose([y_hat[:,i] - y[:,i]]),np.array([x[:,i]])))
    E_hat = E_hat / n


    #compute W = kxd
    W = np.add(np.transpose(prev_W), -1/L * np.matmul(sigma_hat_inv,np.transpose(E_hat)))
    W = np.transpose(W)
    
    #print(t)
  return W

def stagewise_regression(x,y,GEN,L,g,n,d,k,thres,p=2,T=20):
  i_sets = []
  Ws = []
    
  y_hat = np.zeros([k,n]) #kxn

  for t in range(T):
    #sys.stdout.write(str(t))
    #sys.stdout.flush()#print(t)
    print(t)
    x_tilt = [] #pxn
    i_set = sorted(GEN(x,p))
    i_sets.append(i_set)

    for i in i_set:
      x_tilt.append(x[i])
    x_tilt = np.array(x_tilt)

    W = generalized_least_squares(x_tilt,np.add(y,-y_hat),np.zeros([k,p]),L,g,n,p,k,thres) #kxp
    Ws.append(W)
    
    new_y_hat = []
    for i in range(n):
      new_y_hat.append(y_hat[:,i] + np.matmul(W,x_tilt[:,i]))
    y_hat = np.transpose(new_y_hat) #kxn
   
    count = 0
    for i in range(n):
      if(np.argmax(y_hat[:,i]) != np.nonzero(y[:,i])[0][0]):
        count += 1
    print(count)
    
  return (i_sets, Ws)

n = 100
d = 3
k = 3
p = 2
T = 100
thres = 0.01
pca = PCA(n_components=d)
x = np.transpose(pca.fit_transform(x_train))
#x = np.transpose(x_train[:n])
y = np.zeros([k,n])
for i in range(n):
  y[y_train[i]][i] = 1

W0 = np.zeros([k,d])
L = 1

def g(u):
  #return u
  for i in range(len(u)):
    if (u[i] < 10):
      u[i] = np.exp(u[i])/(1+np.exp(u[i]))
    else:
      u[i] = 1 
  return u

def GEN(X,p): #return the idx of selected X or 'j' of X_j
  arr = [i for i in range(len(X))]
  np.random.shuffle(arr)
  return arr[:p] #return selection of p rows
  #return #some other methods
    
def test(W,g):
  n = 50
  x = np.transpose(x_test[:n])
  y = np.zeros([k,n])
  for i in range(n):
    y[y_test[i]][i] = 1

  y_hat = []
  for i in range(n):
    y_hat.append(np.array(g(np.matmul(W,np.array(x[:,i])))))
  y_hat = np.transpose(np.array(y_hat))

  count = 0
  for i in range(n):
    if(np.argmax(y_hat[:,i]) != np.nonzero(y[:,i])[0][0]):
      count += 1

  return str((count / n) * 100) + '% fail'

def test_yhat(i_sets, Ws):
  n = 50
  #x = np.transpose(x_test[:n])
  x = np.transpose(pca.transform(x_test))

  y_hat = np.zeros([k,n]) #kxn
  for t in range(T):
    x_tilt = [] #pxn
    i_set = i_sets[t]
    for i in i_set:
      x_tilt.append(x[i])
    x_tilt = np.array(x_tilt)

    W = Ws[t]
    
    new_y_hat = []
    for i in range(n):
      new_y_hat.append(y_hat[:,i] + np.matmul(W,x_tilt[:,i]))
    y_hat = np.transpose(new_y_hat) #kxn
    
    
    count = 0
    for i in range(n):
      if(np.argmax(y_hat[:,i]) != np.nonzero(y[:,i])[0][0]):
        count += 1
          
  return "total " + str( (count / n) * 100) + '% fail'

result = stagewise_regression(x,y,GEN,L,g,n,d,k,thres,p,T)
print(test_yhat(result[0], result[1])) #% with logit g, 66% with lin g
#print(test(generalized_least_squares(x,y,W0,L,g,n,d,k,thres),g)) #% with logit g, 16% with lin g

0
19
1


KeyboardInterrupt: 

In [50]:
def calibrated_least_squares(x,y,W0,thres): 
  def clip(v, s=1):
    #reference: https://gist.github.com/daien/1272551/edd95a6154106f8e28209a1c7964623ef8397246
    n, = v.shape  # will raise ValueError if v is not 1-D
    # check if we are already on the simplex
    if v.sum() == s and np.alltrue(v >= 0):
        # best projection: itself!
        return v
    # get the array of cumulative sums of a sorted (decreasing) copy of v
    u = np.sort(v)[::-1]
    cssv = np.cumsum(u)
    # get the number of > 0 components of the optimal solution
    rho = np.nonzero(u * np.arange(1, n+1) > (cssv - s))[0][-1]
    # compute the Lagrange multiplier associated to the simplex constraint
    theta = float(cssv[rho] - s) / rho
    # compute the projection by thresholding v using theta
    w = (v - theta).clip(min=0)
    return w

  def is_close(a,b,thres):
    s = 0
    for i in range(len(a)):
      for j in range(len(a[i])):
        s += abs(a[i][j] - b[i][j])
    #print(s)
    return s < thres 

  y_hat = []
  for i in range(n):
    y_hat.append(np.matmul(W0,np.transpose([x[:,i]]))[:,0])
  y_hat = np.transpose(y_hat)

  prev_W = W0
  W = W0
  t = 0
  while t == 0 or not is_close(W,prev_W,thres):
    t += 1
    prev_W = copy.deepcopy(W)

    W = np.matmul(np.linalg.inv(np.matmul(np.transpose(x),x)),np.transpose(x))
    W = np.matmul(y,W)
    
    y_tilt = []
    for i in range(n):
      y_tilt.append(np.add(np.transpose([y_hat[:,i]]),np.matmul(W,np.transpose([x[:,i]])))[:,0])
    y_tilt = np.transpose(y_tilt)


    u1 = []
    u2 = []
    u3 = []
    for i in range(n):
        a = np.argmax(np.absolute(y_tilt[:,i]))
        u1.append(a)
        u2.append(a**2)
        u3.append(a**3)
    u = np.array([u1,u2,u3])
    
    
    
    W = np.matmul(np.linalg.inv(np.matmul(np.transpose(u),u)),np.transpose(u))
    W_tilt = np.matmul(y,W_tilt)

    y_hat = []
    for i in range(n):
      y_hat.append(clip(np.matmul(W_tilt,G(y_tilt[:,i]))[:,0]))
    y_hat = np.transpose(y_hat)

  return W

calibrated_least_squares(x,y,W0,0.01)