# MovieLens_small_Faiss

In [16]:
import faiss
import numpy
from scipy.sparse import coo_matrix
from sklearn.decomposition import NMF

In [17]:
RANDOM_STATE = 0
N_FACTOR = 20
N_RESULT = 10

## Load Dataset

In [18]:
ratings = numpy.loadtxt(
    'data/ratings.csv',
    delimiter=',',
    skiprows=1,
    usecols=(0, 1, 2),
    dtype=[('userId', 'i8'), ('movieId', 'i8'), ('rating', 'f8')],
)

## Get Rate data, User and Movie data sorted

- user data list, `len(user)` is `671`
- movie data list, `len(movies)` is `9066`
- rate data list, `len(data)` is `9066`

In [19]:
data = ratings['rating']
users = sorted(numpy.unique(ratings['userId']))
movies = sorted(numpy.unique(ratings['movieId']))

## Mapper between id and index

In [20]:
user_id2i = {id: i for i, id in enumerate(users)}
movie_id2i = {id: i for i, id in enumerate(movies)}
movie_i2id = {i: id for i, id in enumerate(movies)}

## Make Sparse Matrix

In [21]:
# matrix row and col
# every value in ratings['userId'] invoke user_id2i.get
row = list(map(user_id2i.get, ratings['userId']))
col = list(map(movie_id2i.get, ratings['movieId']))

rating_matrix = coo_matrix((data, (row, col)))

(row index, col index) value is 9066
```text
(0, 30) 2.5
(0, 833) 3.0
(0, 859) 3.0
(0, 906) 2.0
(0, 931) 4.0
(0, 1017) 2.0
...
```

## NMF(non-negative matrix factorization)

- nmf, `671*9066` -> `671*20` `9066*20`

In [22]:
# non-negative matrix factorization 
model = NMF(n_components=N_FACTOR, init='random', random_state=RANDOM_STATE)
# user matrix 
user_mat = model.fit_transform(rating_matrix)
# movie matrix
movie_mat = model.components_.T

- user matrix, size is `671*20`
```text
[[0.         0.0130852  0.04233652 0.         0.         0.01109127
 0.         0.         0.01037393 0.         0.         0.
 0.         0.         0.         0.         0.         0.02176325
 0.00400426 0.00541233],
...]
```
- movie matrix, size is `9066*20`
```text
[[0.38492652 0.18337294 0.33235427 0.61019606 4.26116671 0.13564159
 0.66864197 0.52531631 1.52488087 1.78896432 2.81218938 0.
 0.4555726  2.58509433 6.99776941 7.65670979 0.65582881 0.45931085
 0.46275145 0.08448926]
...]
```

## Faiss Train

- IndexFlatIP, Exact Search for Inner Product
- dim is 20
- movie matrix convert to float32

In [23]:
movie_index = faiss.IndexFlatIP(N_FACTOR)
movie_index.add(movie_mat.astype('float32'))

## Search movies liked by User Id

In [24]:
def search_by_userid(user_id):
    # user index
    user_i = user_id2i[user_id]
    # 
    user_vec = user_mat[user_i].astype('float32')
    # 
    scores, indices = movie_index.search(numpy.array([user_vec]), N_RESULT)
    # 
    movie_scores = zip(indices[0], scores[0])
    return [
        {
            "movieid": int(movie_i2id[i]),
            "score": float(s),
        }
        for i, s in movie_scores
    ]

## Return 10 result

In [25]:
search_by_userid(1)

[{'movieid': 1196, 'score': 0.16856670379638672},
 {'movieid': 1198, 'score': 0.1558140367269516},
 {'movieid': 1214, 'score': 0.15575842559337616},
 {'movieid': 1240, 'score': 0.14368504285812378},
 {'movieid': 260, 'score': 0.141145259141922},
 {'movieid': 1270, 'score': 0.14029668271541595},
 {'movieid': 541, 'score': 0.1385204941034317},
 {'movieid': 1210, 'score': 0.1376139372587204},
 {'movieid': 1200, 'score': 0.1360698789358139},
 {'movieid': 1387, 'score': 0.13099956512451172}]