# LightFM

#### This is `lightFM` github example that you can also find [here](https://github.com/lyst/lightfm)

In [280]:
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k

# Load the MovieLens 100k dataset. Only five
# star ratings are treated as positive.
data = fetch_movielens(min_rating=5.0)

# Instantiate and train the model
model = LightFM(loss='warp')
model.fit(data['train'], epochs=30, num_threads=2)

# Evaluate the trained model
test_precision = precision_at_k(model, data['test'], k=5).mean()

#### what is the type of train data?

In [281]:
type(data["train"])

scipy.sparse.coo.coo_matrix

In [282]:
data["train"].A

array([[5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [5., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 5., 0., ..., 0., 0., 0.]], dtype=float32)

#### so we should creat ``coo_matrix`` for our train set and pass to LightFM

## coo_matrix

In [260]:
from scipy.sparse import coo_matrix
import numpy as np

In [261]:
coo_matrix((3, 4), dtype=np.int8).toarray()

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]], dtype=int8)

In [262]:
row  = np.array([0, 0, 1, 3, 1, 2])
col  = np.array([0, 0, 1, 3, 1, 0])
data = np.array([1, 1, 1, 1, 2, 7])
coo = coo_matrix((data, (row, col)), shape=(4, 4))
coo

<4x4 sparse matrix of type '<class 'numpy.int32'>'
	with 6 stored elements in COOrdinate format>

In [263]:
print(coo.data)
print(coo.col)
print(coo.row)

[1 1 1 1 2 7]
[0 0 1 3 1 0]
[0 0 1 3 1 2]


In [264]:
type(coo.row)

numpy.ndarray

### so we can use numpy features
for example

In [265]:
np.max(coo.data)

7

In [266]:
dim=1000
data = np.zeros(shape=(dim,dim))

In [267]:
data.size

1000000

In [268]:
data.itemsize

8

In [269]:
print("%d bytes" % (data.size * data.itemsize))

8000000 bytes


In [270]:
from sys import getsizeof
getsizeof(data)

8000112

### Create sample data for work with LightFM

In [271]:
import pandas as pd
data = pd.DataFrame()
data["user_id"] = ["u1","u2","u1","u3","u4","u2","u4"]
data["item_id"] = ["p1","p2","p3","p2","p4","p4","p5"]
data["rate"] = [3,2,5,1,3,2,1]
df = data.copy()
df

Unnamed: 0,user_id,item_id,rate
0,u1,p1,3
1,u2,p2,2
2,u1,p3,5
3,u3,p2,1
4,u4,p4,3
5,u2,p4,2
6,u4,p5,1


### convert dataframe to coo_matris

In [272]:
df.set_index(['user_id', 'item_id'], inplace=True)

In [273]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,rate
user_id,item_id,Unnamed: 2_level_1
u1,p1,3
u2,p2,2
u1,p3,5
u3,p2,1
u4,p4,3
u2,p4,2
u4,p5,1


In [274]:
df.index

MultiIndex(levels=[['u1', 'u2', 'u3', 'u4'], ['p1', 'p2', 'p3', 'p4', 'p5']],
           labels=[[0, 1, 0, 2, 3, 1, 3], [0, 1, 2, 1, 3, 3, 4]],
           names=['user_id', 'item_id'])

In [275]:
n_users = len(df.index.levels[0])
n_items = len(df.index.levels[1])
coo = coo_matrix((df["rate"], (df.index.labels[0], df.index.labels[1])), shape=(n_users, n_items))

In [276]:
coo.toarray()

array([[3, 0, 5, 0, 0],
       [0, 2, 0, 2, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 3, 1]], dtype=int64)

In [277]:
model = LightFM(loss='warp')
model.fit(coo, epochs=30, num_threads=2)

<lightfm.lightfm.LightFM at 0x1c30178a390>

In [278]:
user_index = pd.DataFrame()
user_index["user_id"] = df.index.get_level_values("user_id").tolist()
user_index["idx"] = df.index.labels[0].tolist()
user_index = user_index.drop_duplicates()
user_index = user_index.set_index("user_id")
user_index

Unnamed: 0_level_0,idx
user_id,Unnamed: 1_level_1
u1,0
u2,1
u3,2
u4,3


In [279]:
item_index = pd.DataFrame()
item_index["item_id"] = df.index.get_level_values("item_id").tolist()
item_index["idx"] = df.index.labels[1].tolist()
item_index = item_index.drop_duplicates()
item_index = item_index.reset_index()
item_index = item_index.drop(columns=["index","idx"])
item_index

Unnamed: 0,item_id
0,p1
1,p2
2,p3
3,p4
4,p5


In [238]:
import numpy as np
scores = model.predict(user_index.loc["u2"], np.arange(n_items))
scores

array([-1.32863605,  0.2660026 , -1.3256793 , -0.00607806, -0.82872379])

In [257]:
ranked_item_idx = np.argsort(-scores)
ranked_item_idx

array([1, 3, 4, 2, 0], dtype=int64)

In [259]:
recommend_rankede_list = item_index.iloc[ranked_item_idx]["item_id"]
recommend_rankede_list

1    p2
3    p4
4    p5
2    p3
0    p1
Name: item_id, dtype: object

In [305]:
from lightfm import LightFM
from lightfm.datasets import fetch_movielens
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score


# Load the MovieLens 100k dataset. Only five
# star ratings are treated as positive.
data = fetch_movielens(min_rating=0)

# Instantiate and train the model
model = LightFM(loss='warp')
model.fit(data['train'], epochs=30, num_threads=2)

# Evaluate the trained model
test_precision = precision_at_k(model, data['test'], k=5).mean()
test_precision

0.12216331

In [314]:
data["item_features"]

<1682x1682 sparse matrix of type '<class 'numpy.float32'>'
	with 1682 stored elements in Compressed Sparse Row format>