# Factorization Machine Example

## 0. Import

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from surprise import SVD

import numpy as np
import warnings

warnings.filterwarnings(action='ignore')

## 1. Define function that makes FM input

In [2]:
def make_fm_input(data):
    results = np.zeros((len(data), len(data[0]) + int((len(data[0]))*(len(data[0])-1)/2)))

    for i, d in enumerate(data):
        tmp = np.array([])
        tmp = np.append(tmp, d)
        
        for j in range(len(d)-1):
            for k in range(j+1, len(d)):
                tmp = np.append(tmp, d[j] * d[k])
                
        results[i] = tmp
        
    return results

## 2. Load dataset

In [3]:
X, y = load_breast_cancer(return_X_y=True)
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.25, random_state=42)

fm_train_X = make_fm_input(train_X)
fm_test_X = make_fm_input(test_X)

print(train_X.shape)
print(fm_train_X.shape)
print(train_y.shape)

print(test_X.shape)
print(fm_test_X.shape)
print(test_y.shape)

(426, 30)
(426, 465)
(426,)
(143, 30)
(143, 465)
(143,)


## 3. Train FM

In [4]:
fm_classifier = LogisticRegression(penalty='none')
fm_classifier.fit(fm_train_X, train_y)

LogisticRegression(penalty='none')

## 4. Compare FM with logistic model

In [5]:
lg_classifier = LogisticRegression(penalty='none')
lg_classifier.fit(train_X, train_y)

LogisticRegression(penalty='none')

In [6]:
fm_classifier.score(fm_test_X, test_y)

0.972027972027972

In [7]:
lg_classifier.score(test_X, test_y)

0.965034965034965