# FFM Model

## Imports

In [1]:
import random
import pandas as pd
import numpy as np
from hashlib import sha256

In [2]:
from pyspark.sql import SparkSession

app_name = "final_project_notebook"
master = "local[*]"
spark = SparkSession\
        .builder\
        .appName(app_name)\
        .master(master)\
        .getOrCreate()
sc = spark.sparkContext

## Generate a small sample to work with

In [3]:
%%writefile sample.txt
1	10	ESPN	Nike
1	15	ESPN	Nike
0	2	ESPN	Gucci
1	10	ESPN	Adidas
1	10	ESPN	Adidas
0	3	Vogue	Nike
1	20	Vogue	Gucci
0	5	Vogue	Adidas
1	50	NBC	Nike
0	0	NBC	Gucci
0	4	NBC	Adidas
0	4	NBC	Adidas

Overwriting sample.txt


In [4]:
sample_RDD = sc.textFile('sample.txt')
split_RDD = sample_RDD.map(lambda line: line.split('\t')).cache()

In [5]:
sample_df = split_RDD.toDF()
sample_df.show()

+---+---+-----+------+
| _1| _2|   _3|    _4|
+---+---+-----+------+
|  1| 10| ESPN|  Nike|
|  1| 15| ESPN|  Nike|
|  0|  2| ESPN| Gucci|
|  1| 10| ESPN|Adidas|
|  1| 10| ESPN|Adidas|
|  0|  3|Vogue|  Nike|
|  1| 20|Vogue| Gucci|
|  0|  5|Vogue|Adidas|
|  1| 50|  NBC|  Nike|
|  0|  0|  NBC| Gucci|
|  0|  4|  NBC|Adidas|
|  0|  4|  NBC|Adidas|
+---+---+-----+------+



## Feature Hashing

In [6]:
def feature_hash(x, modulo=10**6):
    """
    Now we create a function that can be used to hash the features in each observation in the RDD. 
    We replace the label with 1, -1 and we hash all other features using sha256 
    and then we take modulo some power of 10. 
    """

    x[0] = 2*int(x[0]) - 1
    for i, value in enumerate(x[1:], 1):
        h = sha256("{i}-{val}".format(i=i,val=value).encode('ascii'))
        hashed_value = int(h.hexdigest(), base=16) 
        hashed_value_mod = hashed_value % modulo
        x[i] = hashed_value_mod
    return x

In [7]:
sample_hashed = split_RDD.map(lambda x: feature_hash(x, 100))

In [8]:
sample_hashed.collect()

[[1, 51, 91, 29],
 [1, 57, 91, 29],
 [-1, 77, 91, 43],
 [1, 51, 91, 36],
 [1, 51, 91, 36],
 [-1, 27, 34, 29],
 [1, 14, 34, 43],
 [-1, 37, 34, 36],
 [1, 68, 48, 29],
 [-1, 35, 48, 43],
 [-1, 22, 48, 36],
 [-1, 22, 48, 36]]

## Model

In [61]:
def phi(x):
    total = 0
    for i in range(len(x) - 1):
        for j in range(i+1, len(x)):
            total += np.dot(w_old[x[i], j, :], w_old[x[j], i, :])
            
    return total

def kappa(y, features):
    return -y/(1 + np.exp(phi(features)))

def gradient_update(x):
    y = x[0]
    features = x[1:]
    k = kappa(y, features)
    for i in range(len(features) - 1):
        for j in range(i+1, len(features)):
            yield ((i, features[j]), (eta * w_old[features[i], j, :] + k * w_old[features[j], i, :], 1))
            
    for i in range(len(features) - 1):
        for j in range(i+1, len(features)):
            yield ((j, features[i]), (eta * w_old[features[j], i, :] + k * w_old[features[i], j, :], 1))
    

In [40]:
# Initialize first random model
k = 3
n_features = 100
n_fields = 3
np.random.seed(1)
w_old = np.random.uniform(0, 1/np.sqrt(k), size=(n_features, n_fields, k))

In [41]:
sample_hashed.take(1)

[[1, 51, 91, 29]]

In [42]:
wj0f1_wj1f0 = np.dot(w_old[51, 1, :], w_old[91, 0, :])
wj0f2_wj2f0 = np.dot(w_old[51, 2, :], w_old[29, 0, :])
wj1f2_wj2f1 = np.dot(w_old[91, 2, :], w_old[29, 1, :])
total = wj0f1_wj1f0 + wj0f2_wj2f0 + wj1f2_wj2f1
print(f"Expected value is the sum of these three: {total}")

Expected value is the sum of these three: 0.7205119162918825


In [47]:
eta = 0.1
sc.broadcast(w_old)
sc.broadcast(eta)
sample_hashed.map(lambda x: phi(x[1:])).collect()[0]

0.7205119162918825

In [62]:
sample_hashed.flatMap(lambda x: gradient_update(x)).collect()

[((0, 91), (array([-0.12366285, -0.11483603, -0.0005904 ]), 1)),
 ((0, 29), (array([-0.14425123, -0.08595692, -0.11650726]), 1)),
 ((1, 29), (array([-0.14187508, -0.04045859, -0.12342043]), 1)),
 ((1, 51), (array([ 0.01263734, -0.01019001, -0.14014146]), 1)),
 ((2, 51), (array([-0.00913235, -0.14203834, -0.01008046]), 1)),
 ((2, 91), (array([-0.02036593, -0.04850733,  0.02783382]), 1)),
 ((0, 91), (array([-0.07888543, -0.09708068, -0.0064921 ]), 1)),
 ((0, 29), (array([-0.07605339, -0.07037244, -0.07434009]), 1)),
 ((1, 29), (array([-0.11308775, -0.02971325, -0.10108303]), 1)),
 ((1, 57), (array([-0.04041297,  0.01255432, -0.07420504]), 1)),
 ((2, 57), (array([-0.10559504, -0.08354991, -0.05221481]), 1)),
 ((2, 91), (array([-0.00798085, -0.03667551,  0.02975374]), 1)),
 ((0, 91), (array([0.16600099, 0.19724459, 0.08519065]), 1)),
 ((0, 43), (array([0.11363218, 0.02008188, 0.12477961]), 1)),
 ((1, 43), (array([0.0665013 , 0.19016751, 0.06649315]), 1)),
 ((1, 77), (array([0.10455133, 0.2

In [69]:
sample_hashed.flatMap(lambda x: gradient_update(x)) \
            .reduceByKey(lambda x, y: (x[0] + y[0], x[1] + y[1])) \
            .collect()

[((1, 29), (array([-0.18721951, -0.05344777, -0.17127541]), 4)),
 ((1, 51), (array([ 0.03103235, -0.04295678, -0.45881235]), 3)),
 ((1, 57), (array([-0.04041297,  0.01255432, -0.07420504]), 1)),
 ((1, 43), (array([0.12617871, 0.20440026, 0.11355841]), 3)),
 ((1, 77), (array([0.10455133, 0.22733912, 0.13001895]), 1)),
 ((0, 36), (array([0.16646922, 0.26998625, 0.17153074]), 5)),
 ((0, 34), (array([0.09520989, 0.14580299, 0.24223153]), 3)),
 ((1, 27), (array([0.0296366 , 0.08866191, 0.15649661]), 1)),
 ((2, 34), (array([0.14069499, 0.11453877, 0.12566669]), 3)),
 ((2, 14), (array([-0.0916813 , -0.00063737, -0.1391282 ]), 1)),
 ((1, 37), (array([0.08357662, 0.1842821 , 0.19996204]), 1)),
 ((0, 48), (array([0.31411233, 0.3417631 , 0.4107831 ]), 4)),
 ((2, 68), (array([-0.0239938 , -0.02535181,  0.01527031]), 1)),
 ((2, 48), (array([0.42396701, 0.17463993, 0.36322313]), 4)),
 ((1, 35), (array([0.07997747, 0.20720971, 0.13418325]), 1)),
 ((2, 22), (array([0.18989558, 0.21749983, 0.21957247])

In [72]:
sample_hashed.flatMap(lambda x: gradient_update(x)) \
            .reduceByKey(lambda x, y: (x[0] + y[0], x[1] + y[1])) \
            .map(lambda x: ((x[0][0], x[0][1]), x[1][0] / x[1][1])) \
            .collect()

[((1, 29), array([-0.04680488, -0.01336194, -0.04281885])),
 ((1, 51), array([ 0.01034412, -0.01431893, -0.15293745])),
 ((1, 57), array([-0.04041297,  0.01255432, -0.07420504])),
 ((1, 43), array([0.04205957, 0.06813342, 0.0378528 ])),
 ((1, 77), array([0.10455133, 0.22733912, 0.13001895])),
 ((0, 36), array([0.03329384, 0.05399725, 0.03430615])),
 ((0, 34), array([0.03173663, 0.048601  , 0.08074384])),
 ((1, 27), array([0.0296366 , 0.08866191, 0.15649661])),
 ((2, 34), array([0.04689833, 0.03817959, 0.0418889 ])),
 ((2, 14), array([-0.0916813 , -0.00063737, -0.1391282 ])),
 ((1, 37), array([0.08357662, 0.1842821 , 0.19996204])),
 ((0, 48), array([0.07852808, 0.08544077, 0.10269578])),
 ((2, 68), array([-0.0239938 , -0.02535181,  0.01527031])),
 ((2, 48), array([0.10599175, 0.04365998, 0.09080578])),
 ((1, 35), array([0.07997747, 0.20720971, 0.13418325])),
 ((2, 22), array([0.09494779, 0.10874991, 0.10978623])),
 ((0, 91), array([-0.06332945, -0.05532235,  0.0130103 ])),
 ((0, 29), ar

In [76]:
initial_state = np.zeros(shape=(100,3,3))
print(initial_state)
# sample_hashed.flatMap(lambda x: gradient_update(x)) \
#             .reduceByKey(lambda x, y: (x[0] + y[0], x[1] + y[1])) \
#             .map(lambda x: ((x[0][0], x[0][1]), x[1][0] / x[1][1])) \
#             .aggregateByKey

[[[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [

In [33]:
def train_ffm(x, n_features, n_fields, k):
#     G = np.ones((n_features, n_fields, k))
    w_old = np.random(0, 1/np.sqrt(k), size=(n_features, n_fields, k))
    sc.broadcast(w_old)
    sc.broadcast(eta)
#     sc.broadcast(G)

In [42]:
np.max(np.random.uniform(0, 1/np.sqrt(10), size=(100, 3, 10)))

0.31617114160318766

In [78]:
import sys
sys.getsizeof(np.random.uniform(0, 1, size=(1000000,10)))

80000112