In [1]:
%load_ext cython

In [15]:
%%cython -a

import numpy as np
from libc.string cimport memcpy
from libc.stdlib cimport malloc, free
from cython.parallel import parallel, prange
from scipy.linalg cimport cython_blas

import os
import pandas as pd
from subprocess import call
from datetime import datetime

file_dir = 'ml-100k'
file_path = os.path.join(file_dir, 'u.data')
if not os.path.isdir(file_dir):
    call(['curl', '-O', 'http://files.grouplens.org/datasets/movielens/' + file_dir + '.zip'])
    call(['unzip', file_dir + '.zip'])

user_col = 'user_id'
item_col = 'item_id'
rating_col = 'rating'
timestamp_col = 'timestamp'
names = [user_col, item_col, rating_col, timestamp_col]
df = pd.read_csv(file_path, sep = '\t', names = names)

def extract_time(row):
    timestamp = datetime.utcfromtimestamp(row)
    time = str(timestamp.year) + ',' + str(timestamp.month)
    return time
    
df[timestamp_col] = df[timestamp_col].apply(extract_time)
df_time = (df[timestamp_col].
           str.split(',', expand = True).
           rename(columns = {0: 'year', 1: 'month'}))

for col in df_time.columns:
    df_time[col] = df_time[col].apply(int)

df = df.drop(timestamp_col, axis = 1)
df = pd.concat([df, df_time], axis = 1)

# mask is the watershed for the train/test data
mask = (df['year'] == 1998) & (df['month'] >= 4)
df_train = df[~mask]
df_test = df[mask]

from epsilon.transformers import SparseMatrixTransformer
sparse_tr = SparseMatrixTransformer(user_col, item_col, rating_col)
X_train = sparse_tr.fit_transform(df_train)

from epsilon.models import ALSRecommender
als = ALSRecommender(reg = 0.01, alpha = 15, n_iters = 5, n_factors = 10)
als.fit(X_train) 
    

cdef inline void gemv(char* transa, int* m, int* n, float* alpha, float* a,
                       int* lda, float* x, int* incx, float* beta, float* y, int* incy) nogil:
    cython_blas.sgemv(transa, m, n, alpha, a, lda, x, incx, beta, y, incy)
    
cdef inline void gemm(char* transa, char* transb, int* m, int* n, int*k, float* alpha, float* a,
                      int* lda, float* b, int* ldb, float* beta, float* c, int* ldc) nogil:
    cython_blas.sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)

cdef:
    float* result
    
    int one = 1
    float alpha = 1.0, zero = 0.0
    float[:, :] user_factors = als.user_factors_
    float[:, :] item_factors = np.asfortranarray(als.item_factors_)
    int n_items = item_factors.shape[0]
    int n_factors = item_factors.shape[1]
    
    float[:] output = np.zeros(n_items, dtype = np.float32)
    
result = <float*> malloc(sizeof(float) * n_items)


gemv('N', &n_items, &n_factors, &alpha,
     &item_factors[0, 0], &n_items, &user_factors[0, 0], &one, &zero, result, &one)
# gemm('N', 'T', &n_items, &one, &n_factors, &alpha,
#      &item_factors[0, 0], &n_items, &user_factors[0, 0], &one, &zero, result, &one)

memcpy(&output[0], result, sizeof(float) * n_items)
free(result)

print(als.item_factors_.dot(als.user_factors_[0]))
print(np.asarray(output))

ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'

Exception ignored in: 'pandas._libs.lib.is_bool_array'
ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'


ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'

Exception ignored in: 'pandas._libs.lib.is_bool_array'
ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long'
ALSRecommender: 100%|██████████| 5/5 [00:00<00:00, 65.68it/s]

[ 1.0277246   0.98457235  0.90822035 ... -0.03553959  0.25338817
  0.26752836]
[ 1.0277246   0.98457235  0.9082203  ... -0.0355396   0.2533882
  0.26752838]





In [20]:
%%cython -a

import numpy as np
from libc.string cimport memset, memcpy
from libc.stdlib cimport malloc, free

n_items = 10
cdef:
    int n_items = n_items
    float* y_true1
    float[:] result1 = np.zeros(n_items, dtype = np.float32)

y_true1 = <float*> malloc(sizeof(float) * n_items)
memset(y_true1, 0, sizeof(float) * n_items)
y_true1[0] = 5.0
y_true1[1] = 6.0
memcpy(&result1[0], y_true1, sizeof(float) * n_items)

free(y_true1)
print(np.asarray(result1))


Error compiling Cython file:
------------------------------------------------------------
...
from libc.string cimport memset, memcpy
from libc.stdlib cimport malloc, free

n_items = 10
cdef:
    int n_items = n_items
       ^
------------------------------------------------------------

/Users/mingyuliu/.ipython/cython/_cython_magic_6fd2e45cead3299797a460c145503edf.pyx:8:8: 'n_items' redeclared 


TypeError: object of type 'NoneType' has no len()