In [1]:
from imp import reload
import utils as ul
import mls as pb
import fw
#import park as sc
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Data generation

In [2]:
## X: n*p  W: p*m  Y: n*m
np.random.seed(0)
X, Y, W = pb.generate(n = 16)
n = Y.shape[0]
m = Y.shape[1]
p = X.shape[1]

In [3]:
from sklearn.linear_model import MultiTaskLasso
MultiTaskLasso(alpha=.001).fit(X, Y).score(X, Y)

0.99998909921955026

In [4]:
points = ul.mat2point(X,Y)
dataRDD = sc.parallelize(points).mapPartitions(ul.point2mat)

In [5]:
dataRDD.first()

[array([[ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ],
        [ 1.86755799, -0.97727788,  0.95008842, -0.15135721]]),
 array([[ 0.14377827, -0.579834  , -0.26093079,  0.26925737, -0.93048297],
        [ 0.12501844, -0.29203879, -0.65073588, -0.15719248, -0.09999714]])]

In [6]:
hashedDataRDD = dataRDD.map(ul.hashkey)
hashedDataRDD

PythonRDD[2] at RDD at PythonRDD.scala:43

In [7]:
hashedDataRDD.first()

(-4263895035619207593,
 [array([[ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ],
         [ 1.86755799, -0.97727788,  0.95008842, -0.15135721]]),
  array([[ 0.14377827, -0.579834  , -0.26093079,  0.26925737, -0.93048297],
         [ 0.12501844, -0.29203879, -0.65073588, -0.15719248, -0.09999714]])])

In [8]:
modelRDD = hashedDataRDD.mapValues(pb.Model).persist()
modelRDD

PythonRDD[4] at RDD at PythonRDD.scala:43

In [9]:
modelRDD.collect()

[(-4263895035619207593, <mls.Model at 0x7f574c9a6390>),
 (-7129992736779873975, <mls.Model at 0x7f574c99ebe0>),
 (7894955917745079217, <mls.Model at 0x7f574c99ea90>),
 (3062070986511224413, <mls.Model at 0x7f574c96b278>),
 (-4793468373099478964, <mls.Model at 0x7f574c96b7f0>),
 (-3154666840363234380, <mls.Model at 0x7f574c96b828>),
 (8730318122229942956, <mls.Model at 0x7f574c96b860>),
 (-2422112472651632444, <mls.Model at 0x7f574c96b898>)]

## Initialization

In [10]:
np.random.seed(0)

T = 30               # number of iterations of FW
U = np.zeros((T,p))  # left singular vector
V = np.zeros((T,m))  # right singular vector

loss = np.zeros(T)

paramRDD = modelRDD.mapValues(lambda x: pb.Param((x.p, x.m)))
paramRDD

PythonRDD[5] at RDD at PythonRDD.scala:43

In [11]:
paramRDD.collect()

[(-4263895035619207593, <mls.Param at 0x7f574ca0ae10>),
 (-7129992736779873975, <mls.Param at 0x7f574ca0ae80>),
 (7894955917745079217, <mls.Param at 0x7f574c99ed30>),
 (3062070986511224413, <mls.Param at 0x7f574c99efd0>),
 (-4793468373099478964, <mls.Param at 0x7f574c99eda0>),
 (-3154666840363234380, <mls.Param at 0x7f574c99ef28>),
 (8730318122229942956, <mls.Param at 0x7f574c99e5f8>),
 (-2422112472651632444, <mls.Param at 0x7f574c99ee10>)]

In [12]:
paramRDD.first()[1].W

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

## Subfunctions

In [13]:
gradRDD = fw.gradient(paramRDD, modelRDD)
gradRDD

PythonRDD[14] at RDD at PythonRDD.scala:43

In [14]:
gradRDD.first()

(-3154666840363234380,
 array([[-0.10089152, -0.24011634,  1.0154719 , -0.08271822,  0.00573742],
        [-0.12307769, -0.32516214,  1.27095607, -0.11758494,  0.00536778],
        [-0.18694472, -0.39076385,  1.82754819, -0.12526168,  0.01337085],
        [ 0.17852578,  0.44671054, -1.81864579,  0.15765856, -0.00904789]]))

In [15]:
fw.centralize(gradRDD, 1)

(array([ 1.        ,  0.25811172,  0.34515718,  0.17857333]),
 array([-0.20328427,  0.32771366,  1.        , -0.30466848,  0.82997534]))

In [16]:
fw.warmstart(gradRDD).first()

(-3154666840363234380,
 (array([-0.55709145, -0.70062551, -0.99693171,  1.        ]),
  array([ 0.09954048,  0.23502357, -1.        ,  0.08063955, -0.00575557])))

In [17]:
fw.warmstart(gradRDD).values().collect()

[(array([-0.55709145, -0.70062551, -0.99693171,  1.        ]),
  array([ 0.09954048,  0.23502357, -1.        ,  0.08063955, -0.00575557])),
 (array([ 0.37095484,  0.17447088,  0.51249097,  1.        ]),
  array([-0.13899444,  0.72134634,  0.06371089, -0.24337997,  1.        ])),
 (array([ 1.        , -0.2217441 , -0.18032229,  0.04169703]),
  array([-0.19114083,  0.30436118,  1.        , -0.15622772,  0.5959531 ])),
 (array([ 0.13361309,  0.2895    ,  0.20615412,  1.        ]),
  array([-0.07259322,  0.56050784, -0.29242015, -0.3681458 ,  1.        ])),
 (array([ 1.        , -0.01506649,  0.53996012,  0.83451796]),
  array([-0.2152368 ,  0.75045659,  0.62549263, -0.18622991,  1.        ])),
 (array([ 0.63569494,  1.        ,  0.0133302 , -0.1351764 ]),
  array([-0.07706299, -0.37436844,  0.66086573, -0.88320207,  1.        ])),
 (array([ 1.        , -0.00489197,  0.05233511,  0.04840565]),
  array([-0.18627935,  0.28941065,  1.        , -0.11290357,  0.52038693])),
 (array([ 1.        

In [18]:
fw.avgmix(gradRDD, 1)

(array([ 4.58317143,  0.91819453,  0.64117031,  3.47697762]),
 array([-0.93314531,  2.51689314,  3.0576491 , -2.10803424,  5.57995236]))

In [19]:
fw.warmstart(gradRDD).map(lambda x: x[1][1]).collect()

[array([ 0.09954048,  0.23502357, -1.        ,  0.08063955, -0.00575557]),
 array([-0.13899444,  0.72134634,  0.06371089, -0.24337997,  1.        ]),
 array([-0.19114083,  0.30436118,  1.        , -0.15622772,  0.5959531 ]),
 array([-0.07259322,  0.56050784, -0.29242015, -0.3681458 ,  1.        ]),
 array([-0.2152368 ,  0.75045659,  0.62549263, -0.18622991,  1.        ]),
 array([-0.07706299, -0.37436844,  0.66086573, -0.88320207,  1.        ]),
 array([-0.18627935,  0.28941065,  1.        , -0.11290357,  0.52038693]),
 array([-0.15137817,  0.03015541,  1.        , -0.23858472,  0.4693679 ])]

In [20]:
fw.warmstart(gradRDD).map(lambda x: x[1][1]).reduce(lambda x,y: x+y)

array([-0.93314531,  2.51689314,  3.0576491 , -2.10803424,  5.57995236])

In [21]:
fw.poweriter(gradRDD, 0, lambda t: 10)

(array([ 0.90623305,  0.23390941,  0.31279283,  0.16182922]),
 array([-0.14631196,  0.23586888,  0.71974062, -0.21928231,  0.59736705]))

In [22]:
np.array([fw.loground(i) for i in range(101)])

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 3])

In [23]:
u, v = fw.centralize(gradRDD)
u, v

(array([ 1.        ,  0.25811172,  0.34515718,  0.17857333]),
 array([-0.20328427,  0.32771366,  1.        , -0.30466848,  0.82997534]))

In [24]:
u, v = fw.regularize(u, v, 1)
u, v

(array([-0.90623308, -0.23390938, -0.31279286, -0.16182906]),
 array([-0.14631196,  0.23586885,  0.71974067, -0.2192823 ,  0.59736701]))

In [25]:
paramRDD = fw.broadcast(paramRDD, u, v)
paramRDD.values().first().D

array([[ 0.13259274, -0.21375215, -0.6522528 ,  0.19872087, -0.54135374],
       [ 0.03422374, -0.05517194, -0.16835409,  0.05129219, -0.13972974],
       [ 0.04576534, -0.07377809, -0.22512974,  0.06858994, -0.18685213],
       [ 0.02367753, -0.03817043, -0.11647496,  0.03548625, -0.09667134]])

In [26]:
np.outer(u,v)

array([[ 0.13259274, -0.21375215, -0.6522528 ,  0.19872087, -0.54135374],
       [ 0.03422374, -0.05517194, -0.16835409,  0.05129219, -0.13972974],
       [ 0.04576534, -0.07377809, -0.22512974,  0.06858994, -0.18685213],
       [ 0.02367753, -0.03817043, -0.11647496,  0.03548625, -0.09667134]])

In [27]:
fw.loss(paramRDD, modelRDD)

3.5450419508245972

In [28]:
paramRDD, _ = fw.naivestep(paramRDD)
paramRDD.values().first().a

1.0

In [29]:
paramRDD = fw.descent(paramRDD)
paramRDD.values().first().W

array([[ 0.13259274, -0.21375215, -0.6522528 ,  0.19872087, -0.54135374],
       [ 0.03422374, -0.05517194, -0.16835409,  0.05129219, -0.13972974],
       [ 0.04576534, -0.07377809, -0.22512974,  0.06858994, -0.18685213],
       [ 0.02367753, -0.03817043, -0.11647496,  0.03548625, -0.09667134]])

In [30]:
fw.loss(paramRDD, modelRDD)

8.0467309691748863

In [31]:
paramRDD = fw.ascent(paramRDD)
paramRDD.values().first().W

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

In [32]:
fw.loss(paramRDD, modelRDD)

3.5450419508245972

In [33]:
paramRDD, _ = fw.linearsearch(paramRDD, modelRDD, gradRDD)
paramRDD.values().first().a

0.36223180305110358

In [34]:
paramRDD = fw.descent(paramRDD)
paramRDD.values().first().W

array([[ 0.04802931, -0.07742783, -0.23626671,  0.07198302, -0.19609554],
       [ 0.01239693, -0.01998503, -0.06098321,  0.01857966, -0.05061456],
       [ 0.01657766, -0.02672477, -0.08154915,  0.02484546, -0.06768378],
       [ 0.00857675, -0.01382655, -0.04219093,  0.01285425, -0.03501743]])

In [35]:
fw.loss(paramRDD, modelRDD)

1.4013139769868053

In [36]:
paramRDD = fw.ascent(paramRDD)
paramRDD.values().first().W

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.]])

## Main function

In [37]:
np.random.seed(0)
paramRDD = modelRDD.mapValues(lambda x: pb.Param((x.p, x.m)))
fw.iterate(modelRDD, paramRDD, fw.centralize, fw.naivestep, 1, 0)

(PythonRDD[112] at RDD at PythonRDD.scala:43,
 (array([-0.90623308, -0.23390938, -0.31279286, -0.16182906]),
  array([-0.14631196,  0.23586885,  0.71974067, -0.2192823 ,  0.59736701])),
 None,
 8.0467309691750746)

In [38]:
np.random.seed(0)
paramRDD = modelRDD.mapValues(lambda x: pb.Param((x.p, x.m)))
fw.iterate(modelRDD, paramRDD, fw.centralize, fw.linearsearch, 1, 0)

(PythonRDD[140] at RDD at PythonRDD.scala:43,
 (array([-0.90623308, -0.23390938, -0.31279286, -0.16182906]),
  array([-0.14631196,  0.23586885,  0.71974067, -0.2192823 ,  0.59736701])),
 0.36223180305109948,
 1.4013139769868306)

In [39]:
np.random.seed(0)
paramRDD = modelRDD.mapValues(lambda x: pb.Param((x.p, x.m)))
fw.iterate(modelRDD, paramRDD, fw.avgmix, fw.linearsearch, 1, 0)

(PythonRDD[168] at RDD at PythonRDD.scala:43,
 (array([-0.78200398, -0.15666701, -0.10939973, -0.59325958]),
  array([-0.12923706,  0.34858008,  0.42347271, -0.29195468,  0.77280207])),
 0.37611883793393758,
 1.5641803468134916)

In [40]:
np.random.seed(0)
paramRDD = modelRDD.mapValues(lambda x: pb.Param((x.p, x.m)))
fw.iterate(modelRDD, paramRDD, lambda x,t: fw.poweriter(x,t,lambda t: 1), fw.linearsearch, 1, 0)

(PythonRDD[198] at RDD at PythonRDD.scala:43,
 (array([-0.86045553, -0.26912754, -0.28295946, -0.32729282]),
  array([-0.14449102,  0.26663882,  0.66123032, -0.2367075 ,  0.64402648])),
 0.37152312844346974,
 1.3736294153066615)

In [41]:
np.random.seed(0)
paramRDD = modelRDD.mapValues(lambda x: pb.Param((x.p, x.m)))
fw.iterate(modelRDD, paramRDD, lambda x,t: fw.poweriter(x,t,lambda t: 20), fw.linearsearch, 1, 0)

(PythonRDD[266] at RDD at PythonRDD.scala:43,
 (array([-0.90623308, -0.23390938, -0.31279286, -0.16182906]),
  array([-0.14631196,  0.23586885,  0.71974067, -0.2192823 ,  0.59736701])),
 0.36223180305110025,
 1.4013139769868255)

In [42]:
np.random.seed(0)
paramRDD = modelRDD.mapValues(lambda x: pb.Param((x.p, x.m)))
fw.iterate(modelRDD, paramRDD, lambda x,t: fw.poweriter(x,t,fw.loground), fw.linearsearch, 1, 0)

(PythonRDD[296] at RDD at PythonRDD.scala:43,
 (array([-0.86045553, -0.26912754, -0.28295946, -0.32729282]),
  array([-0.14449102,  0.26663882,  0.66123032, -0.2367075 ,  0.64402648])),
 0.37152312844191421,
 1.373629415310659)

In [43]:
np.random.seed(0)
v0 = np.random.randn(m)
np.random.seed(0)
paramRDD = modelRDD.mapValues(lambda x: pb.Param((x.p, x.m)))
fw.iterate(modelRDD, paramRDD, lambda x,t: fw.poweriter(x,t,fw.loground,v0), fw.linearsearch, 1, 0)

(PythonRDD[325] at RDD at PythonRDD.scala:43,
 (array([-0.9026204 ,  0.02944333, -0.33004593, -0.27473475]),
  array([-0.15336388,  0.30672282,  0.70536436, -0.17475036,  0.59525126])),
 0.36056184581770484,
 1.4916671912009316)