In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.preprocessing import normalize
from sklearn import datasets, linear_model
import hdbscan
import seaborn as sns
import statsmodels.api as sm
from numpy import linalg
from math import sqrt
import time
from sklearn.decomposition import PCA
from skopt import gp_minimize, dump, load
from skopt.space import Real, Integer
from skopt.plots import plot_convergence
from skopt.utils import use_named_args
%matplotlib inline

In [2]:
dataset = pd.read_csv('shopitem.csv')
Shopgroup = pd.read_csv('Shopgroup.csv')
Itemgroup = pd.read_csv('Itemgroup.csv')

In [3]:
sparsity=1-dataset.shape[0]/214200/60
sparsity

0.9669993775287893

In [4]:
Ntr=int(dataset.shape[0]*0.6)
Nv=int(dataset.shape[0]*0.8)
sim_data=dataset.values
n=214200
m=60
train=sim_data[:Ntr,:]
validation=sim_data[Ntr:Nv,:]
test=sim_data[Nv:,:]
x_train=train[:,:2]  
x_test=test[:,:2]
x_valid=validation[:,:2]
x_train=x_train.astype(int)
x_test=x_test.astype(int)
x_valid=x_valid.astype(int)
y=train[:,2]
y_test=test[:,2]
y_valid=validation[:,2]

In [5]:
#player set
ind1=[] #matched played for each user
y1=[]
#match set
ind2=[] #players in this match
y2=[]
for u in range(n):
    ind1.append(x_train[x_train[:,0]==u,1])
    y1.append(train[x_train[:,0]==u,2])
for i in range(m):
    ind2.append(x_train[x_train[:,1]==i,0])
    y2.append(train[x_train[:,1]==i,2])

In [6]:
def shrink(x,l):
    if x>l/2:
        X=x-l/2
    elif x<-l/2:
        X=x+l/2
    else:
        X=0
    return X
Vshrink= np.vectorize(shrink)

In [7]:
def mylasso(y,x,k,l,L):
    betaols=linalg.solve(x.transpose()@x+L*np.identity(k),x.transpose()@y)
    beta = Vshrink(betaols,l)
    return beta

In [8]:
def myl0(y,x,k,l,L):
    beta=linalg.solve(x.transpose()@x+L*np.identity(k),x.transpose()@y)
    beta = beta*((beta>np.median(beta))+1)
    return beta

In [None]:
def mygsm(foo,ind1,y1,ind2,y2,x_train,x_valid,y,y_valid,matchgroup,playergroup,n,m,k,l,L): 
    inM=matchgroup.In.max()
    exM=matchgroup.Ex.max()
    inN=playergroup.In.max()
    exN=playergroup.Ex.max()
    P=np.random.normal(2,1,(n,k))
    Q=np.random.normal(2,1,(m,k))
    S=np.random.normal(2,1,(n,k))
    T=np.random.normal(2,1,(m,k))
    A=np.random.normal(2,1,(n,k))
    B=np.random.normal(2,1,(m,k))
    Pnew=np.zeros(shape=(n,k))
    Qnew=np.zeros(shape=(m,k))
    Snew=np.zeros(shape=(n,k))
    Tnew=np.zeros(shape=(m,k))
    Anew=np.zeros(shape=(n,k))
    Bnew=np.zeros(shape=(m,k))
    yhat=np.sum(np.multiply((P[x_train[:,0],:]+S[x_train[:,0],:]+A[x_train[:,0],:]),(Q[x_train[:,1],:]+T[x_train[:,1],:]+B[x_train[:,1],:])),1)
    it=1  #number of iterations
    diff=1  #improvement over last iteration
    diffQTB=1
    diffPSA=1
    while(diff>1e-5 or it<10):
        print("diff=",diff)
        diff=np.sum(np.multiply(Pnew+Snew+Anew-P-S-A,Pnew+Snew+Anew-P-S-A))/n/k+np.sum(np.multiply(Qnew+Tnew+Bnew-Q-T-B,Qnew+Tnew+Bnew-Q-T-B))/m/k
        while(diffQTB>1e-8):
            for i in range(m):
                xpsa=P[ind2[i],:]+S[ind2[i],:]+A[ind2[i],:]
                r=y2[i]-(P[ind2[i],:]+S[ind2[i],:]+A[ind2[i],:])@(T[i,:]+B[i,:])
                Qnew[i,:]=foo(r,xpsa,k,l,L)

            for i in range(inM):
                I=matchgroup.matchId[matchgroup.In==i]
                I=I[I<len(ind2)]
                playerI=[]
                for j in I:
                    playerI=np.concatenate([playerI,ind2[j]]).astype("int")
                xpsa=P[playerI,:]+S[playerI,:]+A[playerI,:]
                r=[]
                for j in I:
                    R=y2[j]-(P[ind2[j],:]+S[ind2[j],:]+A[ind2[j],:])@(Qnew[j,:]+B[j,:])
                    r=np.concatenate([r,R])
                Tnew[I,:]=foo(r,xpsa,k,l,L)

            for i in range(exM):
                I=matchgroup.matchId[matchgroup.Ex==i]
                I=I[I<len(ind2)]
                playerI=[]
                for j in I:
                    playerI=np.concatenate([playerI,ind2[j]]).astype("int")
                xpsa=P[playerI,:]+S[playerI,:]+A[playerI,:]
                r=[]
                for j in I:
                    R=y2[j]-(P[ind2[j],:]+S[ind2[j],:]+A[ind2[j],:])@(Qnew[j,:]+Tnew[j,:])
                    r=np.concatenate([r,R])
                Bnew[I,:]=foo(r,xpsa,k,l,L)
           
            diffQTB=np.sum(np.multiply(Qnew-Q,Qnew-Q))/m/k+np.sum(np.multiply(Tnew-T,Tnew-T))/inM/k+np.sum(np.multiply(Bnew-B,Bnew-B))/exM/k
            print("diffQTB=",diffQTB)
            Q=Qnew
            T=Tnew
            B=Bnew
        while(diffPSA>1e-8):
            for i in range(n):
                xqtb=Q[ind1[i],:]+T[ind1[i],:]+B[ind1[i],:]
                r=y1[i]-(Q[ind1[i],:]+T[ind1[i],:]+B[ind1[i],:])@(S[i,:]+A[i,:])
                Pnew[i,:]=foo(r,xqtb,k,l,L)

            for i in range(inN):
                I=playergroup.playerId[playergroup.In==i]
                I=I[I<len(ind1)]
                matchI=[]
                for j in I:
                    matchI=np.concatenate([matchI,ind1[j]]).astype("int")
                xqtb=Q[matchI,:]+T[matchI,:]+B[matchI,:]
                r=[]
                for j in I:
                    R=y1[j]-(Q[ind1[j],:]+T[ind1[j],:]+B[ind1[j],:])@(Pnew[j,:]+A[j,:])
                    r=np.concatenate([r,R])
                Snew[I,:]=foo(r,xqtb,k,l,L)

            for i in range(exN):
                I=playergroup.playerId[playergroup.Ex==i]
                I=I[I<len(ind1)]
                matchI=[]
                for j in I:
                    matchI=np.concatenate([matchI,ind1[j]]).astype("int")
                xqtb=Q[matchI,:]+T[matchI,:]+B[matchI,:]
                r=[]
                for j in I:
                    R=y1[j]-(Q[ind1[j],:]+T[ind1[j],:]+B[ind1[j],:])@(Pnew[j,:]+Snew[j,:])
                    r=np.concatenate([r,R])
                Anew[I,:]=foo(r,xqtb,k,l,L)  
            diffPSA=np.sum(np.multiply(Pnew-P,Pnew-P))/n/k+np.sum(np.multiply(Snew-S,Snew-S))/inN/k+np.sum(np.multiply(Anew-A,Anew-A))/exN/k
            print("diffPSA=",diffPSA)
            P=Pnew
            S=Snew
            A=Anew
        it=it+1
    
    yhat_valid=np.sum(np.multiply((P[x_valid[:,0],:]+S[x_valid[:,0],:]+A[x_valid[:,0],:]),(Q[x_valid[:,1],:]+T[x_valid[:,1],:]+B[x_valid[:,1],:])),1)
    #yhat_valid=np.round(yhat_valid,decimals=4)
    RMSE=sqrt((y_valid-yhat_valid)@(y_valid-yhat_valid)/y_valid.size)
    return RMSE,yhat_valid

In [None]:
def mygsm1(foo,ind1,y1,ind2,y2,x_train,x_valid,y,y_valid,matchgroup,playergroup,n,m,k,l,L): 
    inM=matchgroup.In.max()
    #exM=matchgroup.Ex.max()
    inN=playergroup.In.max()
    #exN=playergroup.Ex.max()
    P=np.random.normal(2,1,(n,k))
    Q=np.random.normal(2,1,(m,k))
    S=np.random.normal(2,1,(n,k))
    T=np.random.normal(2,1,(m,k))
    #A=np.random.normal(2,1,(n,k))
    #B=np.random.normal(2,1,(m,k))
    Pnew=np.zeros(shape=(n,k))
    Qnew=np.zeros(shape=(m,k))
    Snew=np.zeros(shape=(n,k))
    Tnew=np.zeros(shape=(m,k))
    #Anew=np.zeros(shape=(n,k))
    #Bnew=np.zeros(shape=(m,k))
    yhat=np.sum(np.multiply((P[x_train[:,0],:]+S[x_train[:,0],:]),(Q[x_train[:,1],:]+T[x_train[:,1],:])),1)
    it=1  #number of iterations
    diff=1  #improvement over last iteration
    diffQTB=1
    diffPSA=1
    while(diff>1e-5 or it<10):
        print("diff=",diff)
        diff=np.sum(np.multiply(Pnew+Snew-P-S,Pnew+Snew-P-S))/n/k+np.sum(np.multiply(Qnew+Tnew-Q-T,Qnew+Tnew-Q-T))/m/k
        while(diffQTB>1e-8):
            for i in range(m):
                xpsa=P[ind2[i],:]+S[ind2[i],:]
                r=y2[i]-(P[ind2[i],:]+S[ind2[i],:])@(T[i,:])
                Qnew[i,:]=foo(r,xpsa,k,l,L)

            for i in range(inM):
                I=matchgroup.matchId[matchgroup.In==i]
                I=I[I<len(ind2)]
                playerI=[]
                for j in I:
                    playerI=np.concatenate([playerI,ind2[j]]).astype("int")
                xpsa=P[playerI,:]+S[playerI,:]
                r=[]
                for j in I:
                    R=y2[j]-(P[ind2[j],:]+S[ind2[j],:])@(Qnew[j,:])
                    r=np.concatenate([r,R])
                Tnew[I,:]=foo(r,xpsa,k,l,L)

           
            diffQTB=np.sum(np.multiply(Qnew-Q,Qnew-Q))/m/k+np.sum(np.multiply(Tnew-T,Tnew-T))/inM/k
            print("diffQTB=",diffQTB)
            Q=Qnew
            T=Tnew
        while(diffPSA>1e-8):
            for i in range(n):
                xqtb=Q[ind1[i],:]+T[ind1[i],:]
                r=y1[i]-(Q[ind1[i],:]+T[ind1[i],:])@(S[i,:])
                Pnew[i,:]=foo(r,xqtb,k,l,L)

            for i in range(inN):
                I=playergroup.playerId[playergroup.In==i]
                I=I[I<len(ind1)]
                matchI=[]
                for j in I:
                    matchI=np.concatenate([matchI,ind1[j]]).astype("int")
                xqtb=Q[matchI,:]+T[matchI,:]
                r=[]
                for j in I:
                    R=y1[j]-(Q[ind1[j],:]+T[ind1[j],:])@(Pnew[j,:])
                    r=np.concatenate([r,R])
                Snew[I,:]=foo(r,xqtb,k,l,L)

            diffPSA=np.sum(np.multiply(Pnew-P,Pnew-P))/n/k+np.sum(np.multiply(Snew-S,Snew-S))/inN/k
            print("diffPSA=",diffPSA)
            P=Pnew
            S=Snew
        it=it+1
    
    yhat_valid=np.sum(np.multiply((P[x_valid[:,0],:]+S[x_valid[:,0],:]),(Q[x_valid[:,1],:]+T[x_valid[:,1],:])),1)
    #yhat_valid=np.round(yhat_valid,decimals=4)
    RMSE=sqrt((y_valid-yhat_valid)@(y_valid-yhat_valid)/y_valid.size)
    #MAE=sum(abs((y_valid-yhat_valid)))/y_valid.size
    return RMSE,yhat_valid

In [None]:
def mygsm0(foo,ind1,y1,ind2,y2,x_train,x_valid,y,y_valid,n,m,k,l,L): 
    P=np.random.normal(2,1,(n,k))
    Q=np.random.normal(2,1,(m,k))
    Pnew=np.zeros(shape=(n,k))
    Qnew=np.zeros(shape=(m,k))
    yhat=np.sum(np.multiply((P[x_train[:,0],:]),(Q[x_train[:,1],:])),1)
    it=1  #number of iterations
    diff=1  #improvement over last iteration
    diffQTB=1
    diffPSA=1
    while(diff>1e-5 or it<10):
        print("diff=",diff)
        diff=np.sum(np.multiply(Pnew-P,Pnew-P))/n/k+np.sum(np.multiply(Qnew-Q,Qnew-Q))/m/k
        while(diffQTB>1e-8):
            for i in range(m):
                xpsa=P[ind2[i],:]
                r=y2[i]
                Qnew[i,:]=foo(r,xpsa,k,l,L)   
            diffQTB=np.sum(np.multiply(Qnew-Q,Qnew-Q))/m/k
            print("diffQTB=",diffQTB)
            Q=Qnew
        while(diffPSA>1e-8):
            for i in range(n):
                xqtb=Q[ind1[i],:]
                r=y1[i]
                Pnew[i,:]=foo(r,xqtb,k,l,L)

            diffPSA=np.sum(np.multiply(Pnew-P,Pnew-P))/n/k+np.sum(np.multiply(Snew-S,Snew-S))/inN/k
            print("diffPSA=",diffPSA)
            P=Pnew
            S=Snew
        it=it+1
    
    yhat_valid=np.sum(np.multiply((P[x_valid[:,0],:]),(Q[x_valid[:,1],:])),1)
    #yhat_valid=np.round(yhat_valid,decimals=4)
    RMSE=sqrt((y_valid-yhat_valid)@(y_valid-yhat_valid)/y_valid.size)
    #MAE=sum(abs((y_valid-yhat_valid)))/y_valid.size
    return RMSE,yhat_valid

2-layered gssvd rmse focused k =4,...,10

In [None]:
l = Real(low=1e-7, high=1, prior='uniform',
                             name='l')
L = Real(low=1e-7, high=1, prior='uniform',
                             name='L')

dimensions = [l,L]

In [None]:
@use_named_args(dimensions=dimensions)
def Fitness(l,L):
    print()
    rmse,yhat = mygsm(mylasso,ind1,y1,ind2,y2,x_train,x_valid,y,y_valid,Shopgroup,Itemgroup,
                      n,m,k,l,L)
    print()
    print("rmse:",rmse)
    print()
    return rmse

In [None]:
start=time.time()
rmse=np.zeros(shape=(3,7))
for k in range(4,11):
    default_parameters = [1e-2,1e-2]
    search_result = gp_minimize(func=Fitness,
                            dimensions=dimensions,
                            acq_func='EI', 
                            n_calls=100,
                            x0=default_parameters)
    para=search_result.x
    rmse[0,k-4]=mygsm(mylasso,ind1,y1,ind2,y2,x_train,x_test,y,y_test,Shopgroup,Itemgroup,n,m,k,para[0],
    para[1])[0]
print(time.time()-start)



diff= 1
diffQTB= 84.50133163006446
diffQTB= 0.0
diffPSA= 116129.1989497084
diffPSA= 0.0
diff= 77.92588845216187
diff= 2.7462371586073533e-31
diff= 2.7462371586073533e-31
diff= 2.7462371586073533e-31
diff= 2.7462371586073533e-31
diff= 2.7462371586073533e-31
diff= 2.7462371586073533e-31
diff= 2.7462371586073533e-31

rmse: 1.8420421282102255


diff= 1
diffQTB= 94.49409877366301
diffQTB= 0.0
diffPSA= 130067.74313576045
diffPSA= 0.0
diff= 78.64498434335084
diff= 7.930466289318918e-32
diff= 7.930466289318918e-32
diff= 7.930466289318918e-32
diff= 7.930466289318918e-32
diff= 7.930466289318918e-32
diff= 7.930466289318918e-32
diff= 7.930466289318918e-32

rmse: 2.3319635536954464


diff= 1
diffQTB= 93.06992653722905
diffQTB= 0.0
diffPSA= 151805.11389249118
diffPSA= 0.0
diff= 78.88285184617672
diff= 2.473253699376476e-32
diff= 2.473253699376476e-32
diff= 2.473253699376476e-32
diff= 2.473253699376476e-32
diff= 2.473253699376476e-32
diff= 2.473253699376476e-32
diff= 2.473253699376476e-32

rmse: 2.3


diff= 1
diffQTB= 79.74178292874552
diffQTB= 0.0
diffPSA= 136567.2843066127
diffPSA= 0.0
diff= 75.59320797893824
diff= 1.3126664481211056e-31
diff= 1.3126664481211056e-31
diff= 1.3126664481211056e-31
diff= 1.3126664481211056e-31
diff= 1.3126664481211056e-31
diff= 1.3126664481211056e-31
diff= 1.3126664481211056e-31

rmse: 2.460616937815555


diff= 1
diffQTB= 98.58907724394052
diffQTB= 0.0
diffPSA= 87716.44420421758
diffPSA= 0.0
diff= 78.92581893843447
diff= 7.075536222486657e-31
diff= 7.075536222486657e-31
diff= 7.075536222486657e-31
diff= 7.075536222486657e-31
diff= 7.075536222486657e-31
diff= 7.075536222486657e-31
diff= 7.075536222486657e-31

rmse: 1.4654213961211635


diff= 1
diffQTB= 90.72936594163569
diffQTB= 0.0
diffPSA= 115623.47502242998
diffPSA= 0.0
diff= 77.30181353395255
diff= 4.938795496920084e-32
diff= 4.938795496920084e-32
diff= 4.938795496920084e-32
diff= 4.938795496920084e-32
diff= 4.938795496920084e-32
diff= 4.938795496920084e-32
diff= 4.938795496920084e-32

rmse: 2.308


diff= 1
diffQTB= 89.81900183765748
diffQTB= 0.0
diffPSA= 115374.25402479596
diffPSA= 0.0
diff= 77.44280393589484
diff= 2.9263083086116234e-32
diff= 2.9263083086116234e-32
diff= 2.9263083086116234e-32
diff= 2.9263083086116234e-32
diff= 2.9263083086116234e-32
diff= 2.9263083086116234e-32
diff= 2.9263083086116234e-32

rmse: 2.312892349821067


diff= 1
diffQTB= 84.2699963785938
diffQTB= 0.0
diffPSA= 102607.53245019403
diffPSA= 0.0
diff= 77.24966130856279
diff= 3.4450621246798117e-32
diff= 3.4450621246798117e-32
diff= 3.4450621246798117e-32
diff= 3.4450621246798117e-32
diff= 3.4450621246798117e-32
diff= 3.4450621246798117e-32
diff= 3.4450621246798117e-32

rmse: 2.3138422070551


diff= 1
diffQTB= 81.69373994902072
diffQTB= 0.0
diffPSA= 109127.40501470002
diffPSA= 0.0
diff= 75.65552377714064
diff= 2.9216882346497937e-32
diff= 2.9216882346497937e-32
diff= 2.9216882346497937e-32
diff= 2.9216882346497937e-32
diff= 2.9216882346497937e-32
diff= 2.9216882346497937e-32
diff= 2.9216882346497937e-32



diff= 1
diffQTB= 78.29625341515553
diffQTB= 0.0
diffPSA= 106131.77844214301
diffPSA= 0.0
diff= 75.51763673029583
diff= 5.68786133518902e-32
diff= 5.68786133518902e-32
diff= 5.68786133518902e-32
diff= 5.68786133518902e-32
diff= 5.68786133518902e-32
diff= 5.68786133518902e-32
diff= 5.68786133518902e-32

rmse: 2.352207333356228


diff= 1
diffQTB= 93.46355487594722
diffQTB= 0.0
diffPSA= 113708.67844485698
diffPSA= 0.0
diff= 79.91282116444572
diff= 8.639347873531936e-32
diff= 8.639347873531936e-32
diff= 8.639347873531936e-32
diff= 8.639347873531936e-32
diff= 8.639347873531936e-32
diff= 8.639347873531936e-32
diff= 8.639347873531936e-32

rmse: 2.3025053019004695


diff= 1
diffQTB= 91.20106341247717
diffQTB= 0.0
diffPSA= 103419.47650251008
diffPSA= 0.0
diff= 75.6639143814213
diff= 1.0663206590218384e-31
diff= 1.0663206590218384e-31
diff= 1.0663206590218384e-31
diff= 1.0663206590218384e-31
diff= 1.0663206590218384e-31
diff= 1.0663206590218384e-31
diff= 1.0663206590218384e-31

rmse: 1.944586104

diff= 1
diffQTB= 90.49219098172118
diffQTB= 0.0
diffPSA= 87787.98470123326
diffPSA= 0.0
diff= 78.23178640623615
diff= 9.784390190737765e-31
diff= 9.784390190737765e-31
diff= 9.784390190737765e-31
diff= 9.784390190737765e-31
diff= 9.784390190737765e-31
diff= 9.784390190737765e-31
diff= 9.784390190737765e-31

diff= 1
diffQTB= 93.5068899515449
diffQTB= 0.0
diffPSA= 139191.56577942212
diffPSA= 0.0
diff= 80.09098663013027
diff= 3.15589286787044e-31
diff= 3.15589286787044e-31
diff= 3.15589286787044e-31
diff= 3.15589286787044e-31
diff= 3.15589286787044e-31
diff= 3.15589286787044e-31
diff= 3.15589286787044e-31

rmse: 2.0498676903525324


diff= 1
diffQTB= 100.48981007938025
diffQTB= 0.0
diffPSA= 100847.80854930314
diffPSA= 0.0
diff= 79.1288743192893
diff= 2.4869520784094965e-32
diff= 2.4869520784094965e-32
diff= 2.4869520784094965e-32
diff= 2.4869520784094965e-32
diff= 2.4869520784094965e-32
diff= 2.4869520784094965e-32
diff= 2.4869520784094965e-32

rmse: 2.504144094673363


diff= 1
diffQTB= 89


rmse: 1.8640655169570897


diff= 1
diffQTB= 93.12015403564521
diffQTB= 0.0
diffPSA= 102174.91523309905
diffPSA= 0.0
diff= 79.04312167284664
diff= 3.8172102691527845e-32
diff= 3.8172102691527845e-32
diff= 3.8172102691527845e-32
diff= 3.8172102691527845e-32
diff= 3.8172102691527845e-32
diff= 3.8172102691527845e-32
diff= 3.8172102691527845e-32

rmse: 2.39666617948498


diff= 1
diffQTB= 92.3760953319908
diffQTB= 0.0
diffPSA= 109133.10845343288
diffPSA= 0.0
diff= 78.43433186343438
diff= 1.8234815485899594e-32
diff= 1.8234815485899594e-32
diff= 1.8234815485899594e-32
diff= 1.8234815485899594e-32
diff= 1.8234815485899594e-32
diff= 1.8234815485899594e-32
diff= 1.8234815485899594e-32

rmse: 2.514866929372091


diff= 1
diffQTB= 86.58733027473855
diffQTB= 0.0
diffPSA= 130197.9103869746
diffPSA= 0.0
diff= 77.0237606141997
diff= 1.0089522313115027e-31
diff= 1.0089522313115027e-31
diff= 1.0089522313115027e-31
diff= 1.0089522313115027e-31
diff= 1.0089522313115027e-31
diff= 1.0089522313115027e-31
dif


rmse: 2.4192715575058057


diff= 1
diffQTB= 90.22269613123099
diffQTB= 0.0
diffPSA= 127627.8725265085
diffPSA= 0.0
diff= 77.99761154307899
diff= 6.554761546649839e-32
diff= 6.554761546649839e-32
diff= 6.554761546649839e-32
diff= 6.554761546649839e-32
diff= 6.554761546649839e-32
diff= 6.554761546649839e-32
diff= 6.554761546649839e-32

rmse: 2.3289434029938096


diff= 1
diffQTB= 93.36405968506335
diffQTB= 0.0
diffPSA= 88058.10386729351
diffPSA= 0.0
diff= 76.34098618594169
diff= 3.25046048061655e-32
diff= 3.25046048061655e-32
diff= 3.25046048061655e-32
diff= 3.25046048061655e-32
diff= 3.25046048061655e-32
diff= 3.25046048061655e-32
diff= 3.25046048061655e-32

rmse: 2.7886647680484073


diff= 1
diffQTB= 88.7042442508932
diffQTB= 0.0
diffPSA= 131440.16903813128
diffPSA= 0.0
diff= 77.59957165466224
diff= 2.1567022431647715e-31
diff= 2.1567022431647715e-31
diff= 2.1567022431647715e-31
diff= 2.1567022431647715e-31
diff= 2.1567022431647715e-31
diff= 2.1567022431647715e-31
diff= 2.1567022431647

diffPSA= 0.0
diff= 75.57113271312033
diff= 1.1467934209044723e-32
diff= 1.1467934209044723e-32
diff= 1.1467934209044723e-32
diff= 1.1467934209044723e-32
diff= 1.1467934209044723e-32
diff= 1.1467934209044723e-32
diff= 1.1467934209044723e-32

rmse: 2.5629618009258546


diff= 1
diffQTB= 100.6122096701691
diffQTB= 0.0
diffPSA= 132939.2154043537
diffPSA= 0.0
diff= 79.77017851765385
diff= 1.1563459614984765e-31
diff= 1.1563459614984765e-31
diff= 1.1563459614984765e-31
diff= 1.1563459614984765e-31
diff= 1.1563459614984765e-31
diff= 1.1563459614984765e-31
diff= 1.1563459614984765e-31

rmse: 2.504329614290553


diff= 1
diffQTB= 94.46164603778931
diffQTB= 0.0
diffPSA= 108631.36490396076
diffPSA= 0.0
diff= 78.27347987535931
diff= 4.6514783805038966e-32
diff= 4.6514783805038966e-32
diff= 4.6514783805038966e-32
diff= 4.6514783805038966e-32
diff= 4.6514783805038966e-32
diff= 4.6514783805038966e-32
diff= 4.6514783805038966e-32

rmse: 2.4112975339623874


diff= 1
diffQTB= 91.51657403707297
diffQTB= 0.

rmse: 1.652736665175518


diff= 1
diffQTB= 97.25030381012729
diffQTB= 0.0
diffPSA= 98961.82234520928
diffPSA= 0.0
diff= 79.7826596571216
diff= 2.342274199749725e-31
diff= 2.342274199749725e-31
diff= 2.342274199749725e-31
diff= 2.342274199749725e-31
diff= 2.342274199749725e-31
diff= 2.342274199749725e-31
diff= 2.342274199749725e-31

rmse: 1.8024089833287738


diff= 1
diffQTB= 85.74380932851497
diffQTB= 0.0
diffPSA= 119622.6573802737
diffPSA= 0.0
diff= 76.31627798186435
diff= 9.208735304947562e-32
diff= 9.208735304947562e-32
diff= 9.208735304947562e-32
diff= 9.208735304947562e-32
diff= 9.208735304947562e-32
diff= 9.208735304947562e-32
diff= 9.208735304947562e-32

rmse: 2.631762809541158

diff= 1
diffQTB= 82.54594904312519
diffQTB= 0.0
diffPSA= 87870.76162219726
diffPSA= 0.0
diff= 75.74539842310648
diff= 5.163209425201541e-31
diff= 5.163209425201541e-31
diff= 5.163209425201541e-31
diff= 5.163209425201541e-31
diff= 5.163209425201541e-31
diff= 5.163209425201541e-31
diff= 5.163209425201541e-

diffPSA= 0.0
diff= 77.29975412734004
diff= 6.558645211656213e-32
diff= 6.558645211656213e-32
diff= 6.558645211656213e-32
diff= 6.558645211656213e-32
diff= 6.558645211656213e-32
diff= 6.558645211656213e-32
diff= 6.558645211656213e-32

rmse: 2.301898272038204


diff= 1
diffQTB= 83.19565947421017
diffQTB= 0.0
diffPSA= 118288.66673023644
diffPSA= 0.0
diff= 76.85243999331772
diff= 5.513860825163821e-32
diff= 5.513860825163821e-32
diff= 5.513860825163821e-32
diff= 5.513860825163821e-32
diff= 5.513860825163821e-32
diff= 5.513860825163821e-32
diff= 5.513860825163821e-32

rmse: 2.300133671738734


diff= 1
diffQTB= 90.43833500310816
diffQTB= 0.0
diffPSA= 124580.01245819604
diffPSA= 0.0
diff= 77.59215913481462
diff= 5.890658319172381e-32
diff= 5.890658319172381e-32
diff= 5.890658319172381e-32
diff= 5.890658319172381e-32
diff= 5.890658319172381e-32
diff= 5.890658319172381e-32
diff= 5.890658319172381e-32

rmse: 2.3444085591567236


diff= 1
diffQTB= 91.1469124404197
diffQTB= 0.0
diffPSA= 125731.7698


rmse: 2.3434111296092857


diff= 1
diffQTB= 97.52021212478583
diffQTB= 0.0
diffPSA= 89204.13746626518
diffPSA= 0.0
diff= 78.45873123815718
diff= 7.484531326989296e-32
diff= 7.484531326989296e-32
diff= 7.484531326989296e-32
diff= 7.484531326989296e-32
diff= 7.484531326989296e-32
diff= 7.484531326989296e-32
diff= 7.484531326989296e-32

rmse: 3.680501774705352


diff= 1
diffQTB= 87.46354256384825
diffQTB= 0.0
diffPSA= 89653.57080625204
diffPSA= 0.0
diff= 78.42338373781695
diff= 1.2151968419768881e-31
diff= 1.2151968419768881e-31
diff= 1.2151968419768881e-31
diff= 1.2151968419768881e-31
diff= 1.2151968419768881e-31
diff= 1.2151968419768881e-31
diff= 1.2151968419768881e-31

rmse: 3.842967839418558


diff= 1
diffQTB= 86.51494428028718
diffQTB= 0.0
diffPSA= 124665.77099954637
diffPSA= 0.0
diff= 78.08508413428467
diff= 1.130297675082123e-31
diff= 1.130297675082123e-31
diff= 1.130297675082123e-31
diff= 1.130297675082123e-31
diff= 1.130297675082123e-31
diff= 1.130297675082123e-31
diff= 1.130297

diffPSA= 0.0
diff= 77.88548578979616
diff= 1.2866454022544946e-32
diff= 1.2866454022544946e-32
diff= 1.2866454022544946e-32
diff= 1.2866454022544946e-32
diff= 1.2866454022544946e-32
diff= 1.2866454022544946e-32
diff= 1.2866454022544946e-32

rmse: 2.636808285299009


diff= 1
diffQTB= 91.3557174408106
diffQTB= 0.0
diffPSA= 127198.78497407582
diffPSA= 0.0
diff= 76.77050706660998
diff= 9.194725735732845e-32
diff= 9.194725735732845e-32
diff= 9.194725735732845e-32
diff= 9.194725735732845e-32
diff= 9.194725735732845e-32
diff= 9.194725735732845e-32
diff= 9.194725735732845e-32

rmse: 2.531282319637162


diff= 1
diffQTB= 90.71906364704827
diffQTB= 0.0
diffPSA= 98419.12035740523
diffPSA= 0.0
diff= 76.68015047565983
diff= 5.336256157234096e-32
diff= 5.336256157234096e-32
diff= 5.336256157234096e-32
diff= 5.336256157234096e-32
diff= 5.336256157234096e-32
diff= 5.336256157234096e-32
diff= 5.336256157234096e-32

rmse: 2.424782829324236


diff= 1
diffQTB= 88.68408410715412
diffQTB= 0.0
diffPSA= 127207




diff= 1
diffQTB= 93.0080241238103
diffQTB= 0.0
diffPSA= 87665.60558884106
diffPSA= 0.0
diff= 78.91510204648388
diff= 3.737696564037167e-31
diff= 3.737696564037167e-31
diff= 3.737696564037167e-31
diff= 3.737696564037167e-31
diff= 3.737696564037167e-31
diff= 3.737696564037167e-31
diff= 3.737696564037167e-31

rmse: 1.8828893982940618


diff= 1
diffQTB= 93.39305199793611
diffQTB= 0.0
diffPSA= 88272.2100960417
diffPSA= 0.0
diff= 77.80285671279398
diff= 1.2492207411658495e-31
diff= 1.2492207411658495e-31
diff= 1.2492207411658495e-31
diff= 1.2492207411658495e-31
diff= 1.2492207411658495e-31
diff= 1.2492207411658495e-31
diff= 1.2492207411658495e-31

rmse: 2.7109835752626954


diff= 1
diffQTB= 87.5652558433688
diffQTB= 0.0
diffPSA= 132136.81395975294
diffPSA= 0.0
diff= 77.27454475348551
diff= 7.60153588692022e-32
diff= 7.60153588692022e-32
diff= 7.60153588692022e-32
diff= 7.60153588692022e-32
diff= 7.60153588692022e-32
diff= 7.60153588692022e-32
diff= 7.60153588692022e-32

rmse: 2.348437879618

diff= 6.516218655920619e-32
diff= 6.516218655920619e-32

rmse: 2.530869309118425


diff= 1
diffQTB= 93.95977599775553
diffQTB= 0.0
diffPSA= 120559.12970336035
diffPSA= 0.0
diff= 79.16414838190863
diff= 2.171122625319715e-31
diff= 2.171122625319715e-31
diff= 2.171122625319715e-31
diff= 2.171122625319715e-31
diff= 2.171122625319715e-31
diff= 2.171122625319715e-31
diff= 2.171122625319715e-31

rmse: 2.447760629657174


diff= 1
diffQTB= 92.53896506878452
diffQTB= 0.0
diffPSA= 111901.48295631022
diffPSA= 0.0
diff= 78.32302983117356
diff= 2.2782110206955646e-32
diff= 2.2782110206955646e-32
diff= 2.2782110206955646e-32
diff= 2.2782110206955646e-32
diff= 2.2782110206955646e-32
diff= 2.2782110206955646e-32
diff= 2.2782110206955646e-32

rmse: 2.280557363773434


diff= 1
diffQTB= 90.96213512894455
diffQTB= 0.0
diffPSA= 88310.0601698758
diffPSA= 0.0
diff= 77.63301352493923
diff= 2.219887408816893e-31
diff= 2.219887408816893e-31
diff= 2.219887408816893e-31
diff= 2.219887408816893e-31
diff= 2.2198874


diff= 1
diffQTB= 90.81237895172913
diffQTB= 0.0
diffPSA= 112130.63335915352
diffPSA= 0.0
diff= 79.87875391996093
diff= 5.763775642206968e-32
diff= 5.763775642206968e-32
diff= 5.763775642206968e-32
diff= 5.763775642206968e-32
diff= 5.763775642206968e-32
diff= 5.763775642206968e-32
diff= 5.763775642206968e-32

rmse: 2.3492971833302017


diff= 1
diffQTB= 89.88473723539133
diffQTB= 0.0
diffPSA= 101237.90223102894
diffPSA= 0.0
diff= 77.5946305211846
diff= 1.1601547146719422e-32
diff= 1.1601547146719422e-32
diff= 1.1601547146719422e-32
diff= 1.1601547146719422e-32
diff= 1.1601547146719422e-32
diff= 1.1601547146719422e-32
diff= 1.1601547146719422e-32

rmse: 2.5123556325931222


diff= 1
diffQTB= 89.95603476264894
diffQTB= 0.0
diffPSA= 131274.06192923247
diffPSA= 0.0
diff= 76.5370750785479
diff= 7.06056820168962e-32
diff= 7.06056820168962e-32
diff= 7.06056820168962e-32
diff= 7.06056820168962e-32
diff= 7.06056820168962e-32
diff= 7.06056820168962e-32
diff= 7.06056820168962e-32

rmse: 2.366251361

diff= 3.144356902570402e-31

rmse: 1.7573312460102546


diff= 1
diffQTB= 100.18935789609675
diffQTB= 0.0
diffPSA= 92424.58397442105
diffPSA= 0.0
diff= 77.66679955540232
diff= 1.3637875495537865e-32
diff= 1.3637875495537865e-32
diff= 1.3637875495537865e-32
diff= 1.3637875495537865e-32
diff= 1.3637875495537865e-32
diff= 1.3637875495537865e-32
diff= 1.3637875495537865e-32

rmse: 2.998421729436082


diff= 1
diffQTB= 87.58460391780896
diffQTB= 0.0
diffPSA= 125363.31078035047
diffPSA= 0.0
diff= 76.06461342176462
diff= 1.0417808382310035e-31
diff= 1.0417808382310035e-31
diff= 1.0417808382310035e-31
diff= 1.0417808382310035e-31
diff= 1.0417808382310035e-31
diff= 1.0417808382310035e-31
diff= 1.0417808382310035e-31

rmse: 2.6064996203826993


diff= 1
diffQTB= 86.06055450185458
diffQTB= 0.0
diffPSA= 117379.70868407062
diffPSA= 0.0
diff= 76.70018061863223
diff= 8.615087912529513e-32
diff= 8.615087912529513e-32
diff= 8.615087912529513e-32
diff= 8.615087912529513e-32
diff= 8.615087912529513e-32
diff

1-layered gssvd rmse focused 

In [None]:
@use_named_args(dimensions=dimensions)
def Fitness(l,L):
    rmse,yhat = mygsm1(mylasso,ind1,y1,ind2,y2,x_train,x_valid,y,y_valid,Shopgroup,Itemgroup,
                      n,m,k,l,L)
    print()
    print("rmse:",rmse)
    print()
    return rmse

In [None]:
start=time.time()
for k in range(4,11):
    default_parameters = [1e-2,1e-2]
    search_result = gp_minimize(func=Fitness,
                            dimensions=dimensions,
                            acq_func='EI', 
                            n_calls=100,
                            x0=default_parameters)
    para=search_result.x
    rmse[1,k-4]=mygsm1(mylasso,ind1,y1,ind2,y2,x_train,x_test,y,y_test,Shopgroup,Itemgroup,n,m,k,para[0],
    para[1])[0]
print(time.time()-start)

nogroupsvd rmse focused

In [None]:
@use_named_args(dimensions=dimensions)
def Fitness(l,L):
    print()
    rmse,yhat = mygsm0(mylasso,ind1,y1,ind2,y2,x_train,x_valid,y,y_valid,
                      n,m,k,l,L)
    print()
    print("rmse:",rmse)
    print()
    return rmse

In [None]:
start=time.time()
for k in range(4,11):
    default_parameters = [1e-2,1e-2]
    search_result = gp_minimize(func=Fitness,
                            dimensions=dimensions,
                            acq_func='EI', 
                            n_calls=100,
                            x0=default_parameters)
    para=search_result.x
    rmse[2,k-4]=mygsm0(mylasso,ind1,y1,ind2,y2,x_train,x_test,y,y_test,n,m,k,para[0],
    para[1])[0]
print(time.time()-start)

2-layered gssvd recommender focused

In [None]:
@use_named_args(dimensions=dimensions)
def fitness(l,L):
    Y = mygsm(myl0,ind1,y1,ind2,y2,x_train,x_valid,y,y_valid,Shopgroup,Itemgroup,
                      n,m,k,l,L)[1]
    b=y_valid>(np.median(y_valid))
    acc=[]
    for i in range(1,10):
        a=Y>=(Y[np.argsort(Y)[-i]])
        acc=np.append(acc,sum(b[a])/len(b[a]))
    return acc.mean()

In [None]:
start=time.time()
accuracy=np.zeros(shape=(3,7))
for k in range(4,11):
    default_parameters = [0.5,0.5]
    search_result = gp_minimize(func=fitness,
                            dimensions=dimensions,
                            acq_func='EI', 
                            n_calls=100,
                            x0=default_parameters)
    para=search_result.x
    
    a=mygsm(myl0,ind1,y1,ind2,y2,x_train,x_test,y,y_test,Shopgroup,Itemgroup,n,m,k,para[0],
    para[1])
    b=y_valid>(np.median(y_valid))
    acc=[]
    for i in range(1,10):
        a=Y>=(Y[np.argsort(Y)[-i]])
        acc=np.append(acc,sum(b[a])/len(b[a]))
    accuracy[0,k-4]=acc.mean()
print(time.time()-start)

1-layered gssvd recommender focused

In [None]:
@use_named_args(dimensions=dimensions)
def fitness(l,L):
    Y = mygsm1(myl0,ind1,y1,ind2,y2,x_train,x_valid,y,y_valid,Shopgroup,Itemgroup,
                      n,m,k,l,L)[1]
    b=y_valid>(np.median(y_valid))
    acc=[]
    for i in range(1,10):
        a=Y>=(Y[np.argsort(Y)[-i]])
        acc=np.append(acc,sum(b[a])/len(b[a]))
    return acc.mean()

In [None]:
start=time.time()
for k in range(4,11):
    default_parameters = [0.5,0.5]
    search_result = gp_minimize(func=fitness,
                            dimensions=dimensions,
                            acq_func='EI', 
                            n_calls=100,
                            x0=default_parameters)
    para=search_result.x
    
    a=mygsm1(myl0,ind1,y1,ind2,y2,x_train,x_test,y,y_test,Shopgroup,Itemgroup,n,m,k,para[0],
    para[1])
    b=y_valid>(np.median(y_valid))
    acc=[]
    for i in range(1,10):
        a=Y>=(Y[np.argsort(Y)[-i]])
        acc=np.append(acc,sum(b[a])/len(b[a]))
    accuracy[1,k-4]=acc.mean()
print(time.time()-start)

svd recommender focused

In [None]:
@use_named_args(dimensions=dimensions)
def fitness(l,L):
    Y = mygsm0(myl0,ind1,y1,ind2,y2,x_train,x_valid,y,y_valid,
                      n,m,k,l,L)[1]
    b=y_valid>(np.median(y_valid))
    acc=[]
    for i in range(1,10):
        a=Y>=(Y[np.argsort(Y)[-i]])
        acc=np.append(acc,sum(b[a])/len(b[a]))
    return acc.mean()

In [None]:
start=time.time()
for k in range(4,11):
    default_parameters = [0.5,0.5]
    search_result = gp_minimize(func=fitness,
                            dimensions=dimensions,
                            acq_func='EI', 
                            n_calls=100,
                            x0=default_parameters)
    para=search_result.x
    
    a=mygsm0(myl0,ind1,y1,ind2,y2,x_train,x_test,y,y_test,n,m,k,para[0],
    para[1])
    b=y_valid>(np.median(y_valid))
    acc=[]
    for i in range(1,10):
        a=Y>=(Y[np.argsort(Y)[-i]])
        acc=np.append(acc,sum(b[a])/len(b[a]))
    accuracy[2,k-4]=acc.mean()
print(time.time()-start)