In [1]:
import numpy as np
import pandas as pd
from timeit import default_timer as timer
from sklearn import linear_model
import sklearn
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.linear_model import OrthogonalMatchingPursuitCV
from sklearn.linear_model import Ridge
from sklearn.linear_model import Ridge

In [56]:
'''
Orthogonal Matching Pursuit ALGORITHM
'''
def run_omp_real(X,y,k):
    start = timer()

    n, p = X.shape    
    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=k)
    omp.fit(X, y)
    beta = omp.coef_
    
    ypred = omp.predict(X)    
    squared_error_sum = np.sum(np.square(ypred-y))
    
    end = timer()
    Time = end-start
    
    RMSE = np.sqrt(squared_error_sum*1.0/n)
    
    SS_tot = np.sum(np.square(y-np.mean(y)))
    Rsquared = 1-squared_error_sum*1.0/SS_tot

    print(RMSE,DR,Rsquared,Time,'OMP')    
    return(RMSE,DR,Rsquared,Time,squared_error_sum*1.0/n)

In [3]:
'''
LASSO ALGORITHM
'''
def run_Lasso(X,y,true_beta,k,CURRENT_RANDOM_SEED=1):
    start = timer()
    n, p = X.shape
    
    lasso_model = linear_model.Lasso(alpha = 1.0,random_state=CURRENT_RANDOM_SEED)
    lasso_model.fit(X,y)
    beta = lasso_model.coef_
    ypred = lasso_model.predict(X)
    
    squared_error_sum = np.sum(np.square(ypred-y))
    
    end = timer()
    Time = end-start
    
    RMSE = np.sqrt(squared_error_sum*1.0/n)
    DR = (np.sum((np.multiply(true_beta,beta)!=0)*1.0))*1.0/k
    
    SS_tot = np.sum(np.square(y-np.mean(y)))
    Rsquared = 1-squared_error_sum*1.0/SS_tot
    
    print(RMSE,DR,Rsquared,Time,'LASSO')    
    return()

In [4]:
'''
Truncation function for TSGD Algorithm
'''
def truncate(beta,k):
    p = beta.shape[0]
    sorted_indices = np.abs(beta).argsort()[::-1].tolist()
    dummy = np.zeros(p)
    for el in sorted_indices[:k]:
        dummy[el] = 1.0
    return(np.multiply(beta,dummy))

In [5]:
'''
TSGD ALGORITHM
'''
def run_TSGD(X,y,true_beta,k,min_offline_errors):
    start = timer()

    n, p = X.shape
    ns = NS
    ns = [nn-1 for nn in ns]
    n_idx = 0
    eta = np.log(ns[n_idx])*1.0/ns[n_idx]

    squared_error_sum = 0
    true_support_sum = 0

    beta = np.random.rand(p)-0.5
    for i in range(n):
        prev_beta = beta
        y_pred = X[i,:].dot(prev_beta)
        
        loss_i = (y_pred-y[i,])**2
        current_detection_rate = (np.sum((np.multiply(true_beta,prev_beta)!=0)*1.0))*1.0/k
        squared_error_sum += loss_i
        true_support_sum += current_detection_rate
        
        beta = prev_beta + eta*(y[i]-np.dot(X[i,:],prev_beta))*X[i,:]
        beta = truncate(beta,k)

        if i in ns:
            end = timer()
            Time = end-start
            
            RMSE = np.sqrt(squared_error_sum*1.0/i)
            DR = true_support_sum*1.0/i

            SS_tot = np.sum(np.square(y[:i+1]-np.mean(y[:i+1])))
            Rsquared = 1-squared_error_sum*1.0/SS_tot
            
            regret = squared_error_sum*1.0/i - min_offline_errors[n_idx]

            if n_idx != len(ns) - 1:
                n_idx += 1
                eta = np.log(ns[n_idx])*1.0/ns[n_idx]

            print(RMSE[0],DR,Rsquared[0],Time,regret[0],i+1,'TSGD')
    
    return()

In [6]:
'''
OLST ALGORITHM
'''
def run_OLST(X,y,true_beta,k,rho,min_offline_errors):
    start = timer()
    n, p = X.shape
    ns = NS
    ns = [nn-1 for nn in ns]
    n_idx = 0
    y = y.reshape((n,1))

    squared_error_sum = 0
    true_support_sum = 0

    beta = np.random.rand(p)-0.5

    #INITIALIZATION
    mu_x = np.zeros((p))
    mu_y = 0
    S_xx = np.zeros((p,p))
    S_xy = np.zeros((p,1))

    for i in range(n):
        if i%100==1:
            print(i)

        prev_beta = beta
        y_pred = X[i,:].dot(prev_beta)
        loss_i = (y_pred-y[i,0])**2
        current_detection_rate = (np.sum((np.multiply(true_beta,prev_beta)!=0)*1.0))*1.0/k
        
#         print(loss_i,current_detection_rate)
        
        squared_error_sum += loss_i
        true_support_sum += current_detection_rate
                    
        # Finding $\hat{beta}$ by OLS
        X_current, y_current = X[i,:].reshape((1,p)), y[i]
        mu_x = i*mu_x/(i+1) + X_current*1.0/(i+1)
        mu_y = i*mu_y/(i+1) + y_current*1.0/(i+1)
        S_xx = S_xx*i/(i+1) + np.dot(X_current.T,X_current)*1.0/(i+1)
        S_xy = S_xy*i/(i+1) + y_current*X_current.T*1.0/(i+1)
    
        # Normalization
        if i>=1:
            Pi = np.linalg.inv(np.sqrt(np.multiply((S_xx - np.square(mu_x)),np.identity(p))))
            S_xx_n = np.dot(Pi, S_xx-np.dot(mu_x,mu_x.T))
            S_xx_n = np.dot(S_xx_n,Pi)

            S_xy_n = np.dot(Pi, S_xy) - mu_y*np.dot(Pi, mu_x.T).reshape((p,1))

        clf = Ridge(alpha=0.001)
        if k==10:
            clf = Ridge(alpha=0.001)
        if k==100 and rho!=0:
            clf = Ridge(alpha=1.0)
        
        if i==0:
            clf.fit(S_xx,S_xy)
        else:
            clf.fit(S_xx_n,S_xy_n)
        beta = clf.coef_.reshape((p,))

        # Keeping only k variables with largest $|\hat{\beta}_j|$
        sorted_indices = np.abs(beta).argsort()[::-1].tolist()
        k_biggest_indices = np.sort(sorted_indices[:k])
        
        # Fitting the model on the selected features by OLS
        selected_X = X[:i+1,k_biggest_indices]

        clf = Ridge(alpha=0.0001)
        if k==10:
            clf = Ridge(alpha=0.0001)
        if k==100 and rho!=0:       
            clf = Ridge(alpha=0.1)
        clf.fit(selected_X, y[:i+1])
        OLS_beta = clf.coef_.reshape((k,))
        
#         OLS_beta = np.dot(np.linalg.inv(np.dot(selected_X.T,selected_X)),np.dot(selected_X.T,y_current))
                
        beta = np.zeros(p)
        for j,ind in enumerate(k_biggest_indices):
            beta[ind] = OLS_beta[j]

        if i in ns:
            end = timer()
            Time = end-start
            
            RMSE = np.sqrt(squared_error_sum*1.0/i)
            DR = true_support_sum*1.0/i

            SS_tot = np.sum(np.square(y[:i+1]-np.mean(y[:i+1])))
            Rsquared = 1-squared_error_sum*1.0/SS_tot

            regret = squared_error_sum*1.0/n - min_offline_errors[n_idx]

            if n_idx != len(ns) - 1:
                n_idx += 1

            print(RMSE,DR,Rsquared,Time,regret,i+1,'OLST')
    
    return()

In [8]:
data = pd.read_pickle('features.pkl')

Unnamed: 0,filepath,feature_array
0,wiki_crop/61/19242061_1982-08-16_2009.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 93.62671,..."
1,wiki_crop/61/2399661_1977-09-02_2008.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.3335304..."
2,wiki_crop/61/44438061_1937-11-14_1969.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 64.4..."
3,wiki_crop/61/37699961_1890-12-11_1947.jpg,"[[0.0, 0.0, 0.0, 31.359468, 0.0, 0.0, 0.0, 15...."
4,wiki_crop/61/41899761_1989-10-06_2012.jpg,"[[0.0, 0.0, 0.0, 0.0, 11.125888, 0.0, 0.0, 0.0..."
5,wiki_crop/61/8794261_1970-10-06_2011.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
6,wiki_crop/61/9707061_1986-11-12_2006.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
7,wiki_crop/61/2265561_1987-08-25_2011.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
8,wiki_crop/61/15330561_1943-08-11_2002.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
9,wiki_crop/61/194961_1967-05-29_2012.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


In [31]:
data['filepath'] = data['filepath'].str.replace('wiki_crop/','')
data

Unnamed: 0,filepath,feature_array
0,61/19242061_1982-08-16_2009.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 93.62671,..."
1,61/2399661_1977-09-02_2008.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 6.3335304..."
2,61/44438061_1937-11-14_1969.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 64.4..."
3,61/37699961_1890-12-11_1947.jpg,"[[0.0, 0.0, 0.0, 31.359468, 0.0, 0.0, 0.0, 15...."
4,61/41899761_1989-10-06_2012.jpg,"[[0.0, 0.0, 0.0, 0.0, 11.125888, 0.0, 0.0, 0.0..."
5,61/8794261_1970-10-06_2011.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
6,61/9707061_1986-11-12_2006.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
7,61/2265561_1987-08-25_2011.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
8,61/15330561_1943-08-11_2002.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
9,61/194961_1967-05-29_2012.jpg,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


In [29]:
df_path = pd.read_csv('unique_path.csv', header=None).T
df_path = df_path.rename(columns = {0:'filepath'})
age = pd.read_csv('age.csv', header=None).T.rename(columns={0:'age'})
df_target = path_df.merge(age, left_index=True, right_index=True)
df_target

Unnamed: 0,filepath,age
0,17/10000217_1981-05-05_2009.jpg,28
1,48/10000548_1925-04-04_1964.jpg,39
2,12/100012_1948-07-03_2008.jpg,59
3,65/10001965_1930-05-23_1961.jpg,31
4,16/10002116_1971-05-31_2012.jpg,41
5,02/10002702_1960-11-09_2012.jpg,51
6,41/10003541_1937-09-27_1971.jpg,33
7,39/100039_1904-12-07_1982.jpg,77
8,13/10004113_1946-08-26_2007.jpg,60
9,22/10004122_1982-03-17_2011.jpg,29


In [62]:
df_z = data.merge(df_target, on='filepath', how='inner')
X = np.vstack(df_z['feature_array'].values)
y = df_z['age'].values.reshape((-1,1))

In [129]:
start = timer()
n,p = X.shape

omp = OrthogonalMatchingPursuit(n_nonzero_coefs=25)
omp.fit(X, y)
beta = omp.coef_
ypred = np.dot(X,beta) 
squared_error_sum = np.sum(np.square(ypred.reshape((-1,1))-y))

end = timer()
Time = end-start

RMSE = np.sqrt(squared_error_sum*1.0/n)

SS_tot = np.sum(np.square(y-np.mean(y)))
Rsquared = 1-squared_error_sum*1.0/SS_tot

print(RMSE,Rsquared,Time,'OMP')

42.725070871515975 -2.6181280144875156 22.462947182008065 OMP


In [125]:
start = timer()
n, p = X.shape

lasso_model = linear_model.Lasso(alpha = 32.0)
lasso_model.fit(X,y)
beta = lasso_model.coef_
ypred = np.dot(X,beta) 

squared_error_sum = np.sum(np.square(ypred.reshape((-1,1))-y))

end = timer()
Time = end-start

RMSE = np.sqrt(squared_error_sum*1.0/n)

SS_tot = np.sum(np.square(ypred.reshape((-1,1))-y))
Rsquared = 1-squared_error_sum*1.0/SS_tot

print(RMSE,Rsquared,Time,'LASSO')  

42.26434460841928 0.0 11.791150449076667 LASSO


In [117]:
np.sum((beta!=0)*1.0)

19.0

In [121]:
NS = []
for i in range(10):
    NS.append(n*(i+1)/10)

In [131]:
'''
TSGD ALGORITHM
'''

start = timer()

n, p = X.shape
ns = NS
ns = [nn-1 for nn in ns]
n_idx = 0
eta = np.log(ns[n_idx])*1.0/ns[n_idx]
k = 25

squared_error_sum = 0
true_support_sum = 0

# beta = np.random.rand(p)-0.5
for i in range(n):
    prev_beta = beta
    y_pred = X[i,:].dot(prev_beta)

    loss_i = (y_pred-y[i,])**2
    print(loss_i)
    
    squared_error_sum += loss_i

    # UPDATE
    beta = prev_beta + eta*(y[i]-np.dot(X[i,:],prev_beta))*X[i,:]
    beta = truncate(beta,k)

    if i in ns:
        end = timer()
        Time = end-start

        RMSE = np.sqrt(squared_error_sum*1.0/i)
        SS_tot = np.sum(np.square(y[:i+1]-np.mean(y[:i+1])))
        Rsquared = 1-squared_error_sum*1.0/SS_tot

        if n_idx != len(ns) - 1:
            n_idx += 1
            eta = np.log(ns[n_idx])*1.0/ns[n_idx]

        print(RMSE[0],Rsquared[0],Time,i+1,'TSGD')

[726.75893888]
[1560618.89282627]
[1.38212517e+10]
[1.53770583e+14]
[3.97627888e+17]
[1.05882631e+22]
[2.08247326e+26]
[3.13361718e+30]
[2.76184073e+34]
[4.11134087e+38]
[2.62889714e+43]
[3.14699979e+47]
[1.66469326e+51]
[9.91678593e+55]
[1.34901275e+60]
[2.91105991e+64]
[6.17232688e+67]
[5.63024114e+71]
[1.13047817e+76]
[2.70883605e+79]
[8.7113953e+82]
[1.01719769e+87]
[5.71973632e+90]
[3.49187763e+94]
[6.88504625e+98]
[1.51022397e+102]
[6.01743514e+105]
[2.02504777e+110]
[6.62189178e+114]
[4.22293204e+118]
[1.43114e+123]
[2.00906438e+127]
[1.79749738e+131]
[3.36434045e+135]
[1.78814785e+139]
[1.04653556e+143]
[5.42783138e+146]
[6.55865416e+149]
[5.25171175e+153]
[5.21529414e+157]
[1.85324047e+160]
[1.05655103e+164]
[2.30416031e+168]
[2.08771081e+172]
[2.07428666e+176]
[9.04828975e+179]
[3.93474199e+183]
[5.49333587e+187]
[2.30212013e+191]
[2.45741889e+195]
[5.56554676e+199]
[9.0642269e+203]
[3.49636734e+206]
[2.73019079e+210]
[9.68629562e+214]
[7.37450474e+218]
[1.06161633e+223]
[3.9




[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[nan]
[na

KeyboardInterrupt: 

In [132]:
'''
OLST ALGORITHM
'''

start = timer()
n, p = X.shape
k = 25
ns = NS
ns = [nn-1 for nn in ns]
n_idx = 0
y = y.reshape((n,1))

squared_error_sum = 0
true_support_sum = 0

beta = np.random.rand(p)-0.5

#INITIALIZATION
mu_x = np.zeros((p))
mu_y = 0
S_xx = np.zeros((p,p))
S_xy = np.zeros((p,1))

for i in range(n):

    prev_beta = beta
    y_pred = X[i,:].dot(prev_beta)
    loss_i = (y_pred-y[i,0])**2
    squared_error_sum += loss_i

    # Finding $\hat{beta}$ by OLS
    X_current, y_current = X[i,:].reshape((1,p)), y[i]
    mu_x = i*mu_x/(i+1) + X_current*1.0/(i+1)
    mu_y = i*mu_y/(i+1) + y_current*1.0/(i+1)
    S_xx = S_xx*i/(i+1) + np.dot(X_current.T,X_current)*1.0/(i+1)
    S_xy = S_xy*i/(i+1) + y_current*X_current.T*1.0/(i+1)

    # Normalization
    if i>=1:
        Pi = np.linalg.inv(np.sqrt(np.multiply((S_xx - np.square(mu_x)),np.identity(p))))
        S_xx_n = np.dot(Pi, S_xx-np.dot(mu_x,mu_x.T))
        S_xx_n = np.dot(S_xx_n,Pi)

        S_xy_n = np.dot(Pi, S_xy) - mu_y*np.dot(Pi, mu_x.T).reshape((p,1))

    clf = Ridge(alpha=0.001)
    if k==10:
        clf = Ridge(alpha=0.001)
    if k==100 and rho!=0:
        clf = Ridge(alpha=1.0)

    if i==0:
        clf.fit(S_xx,S_xy)
    else:
        clf.fit(S_xx_n,S_xy_n)
    beta = clf.coef_.reshape((p,))

    # Keeping only k variables with largest $|\hat{\beta}_j|$
    sorted_indices = np.abs(beta).argsort()[::-1].tolist()
    k_biggest_indices = np.sort(sorted_indices[:k])

    # Fitting the model on the selected features by OLS
    selected_X = X[:i+1,k_biggest_indices]

    clf = Ridge(alpha=0.0001)
    if k==10:
        clf = Ridge(alpha=0.0001)
    if k==100 and rho!=0:       
        clf = Ridge(alpha=0.1)
    clf.fit(selected_X, y[:i+1])
    OLS_beta = clf.coef_.reshape((k,))

#         OLS_beta = np.dot(np.linalg.inv(np.dot(selected_X.T,selected_X)),np.dot(selected_X.T,y_current))

    beta = np.zeros(p)
    for j,ind in enumerate(k_biggest_indices):
        beta[ind] = OLS_beta[j]

    if i in ns:
        end = timer()
        Time = end-start

        RMSE = np.sqrt(squared_error_sum*1.0/i)
        DR = true_support_sum*1.0/i

        SS_tot = np.sum(np.square(y[:i+1]-np.mean(y[:i+1])))
        Rsquared = 1-squared_error_sum*1.0/SS_tot

        regret = squared_error_sum*1.0/n - min_offline_errors[n_idx]

        if n_idx != len(ns) - 1:
            n_idx += 1

        print(RMSE,DR,Rsquared,Time,regret,i+1,'OLST')

94260.80848827685


Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 6.781407180636056e-17


1
900.0


LinAlgError: Singular matrix