In [1]:
import pandas as pd
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
train = pd.read_csv('input/train.csv')
test = pd.read_csv('input/test.csv')
df_item = pd.read_csv('input/item-features.csv')
df_user = pd.read_csv('input/user-features.csv')
subm = pd.read_csv('input/sample-submission.csv')

In [3]:
df_item = df_item.drop(columns=['19','27','30','9'])
df_item = df_item.sort_values('item_id').reset_index(drop=True)

df_user = df_user.iloc[:, :2]
df_user = df_user.sort_values('user_id').reset_index(drop=True)

# First try

In [4]:
from scipy.sparse import csr_matrix

from lightfm import LightFM
from lightfm.evaluation import precision_at_k



In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
df_item_np = csr_matrix(scaler.fit_transform(df_item.iloc[:, 1:]))

df_user_np = csr_matrix(df_user.iloc[:, [1]].values)

y_np = train['like'].values

data_csr = csr_matrix((y_np, (train['user_id'] , train['item_id'])))

In [33]:
NUM_THREADS = 4
NUM_COMPONENTS = 60
NUM_EPOCHS = 30
ITEM_ALPHA = 1e-3
USER_ALPHA = 3e-5
MAX_SAMPLED = 10
LR = 0.03

model = LightFM(loss='warp', user_alpha=USER_ALPHA, item_alpha=ITEM_ALPHA, max_sampled=MAX_SAMPLED,
                no_components=NUM_COMPONENTS, learning_rate=LR, random_state=50)

In [38]:
model.fit(data_csr,
          user_features=df_user_np,
          item_features=df_item_np,
          epochs=NUM_EPOCHS)

train_precision = precision_at_k(model, data_csr, user_features=df_user_np, item_features=df_item_np, k=5).mean()

print(train_precision)

0.1279678


In [37]:
model.fit(data_csr,
          item_features=df_item_np,
          epochs=NUM_EPOCHS)

train_precision = precision_at_k(model, data_csr, item_features=df_item_np, k=5).mean()
print(train_precision)

0.122736424


In [69]:
model.fit(data_csr,
          user_features=df_user_np,
          epochs=NUM_EPOCHS)

train_precision = precision_at_k(model, data_csr, user_features=df_user_np, k=5).mean()

print(train_precision)

0.14768614


In [40]:
model.fit(data_csr,
          epochs=NUM_EPOCHS)

train_precision = precision_at_k(model, data_csr, k=5).mean()

print(train_precision)

0.44587526


In [70]:
def get_ynew():
    def test_func(x, n, b, c):
        return np.power(x, n) * b + c * x
    max_id = train[train['like'] == 1].groupby('user_id')['item_id'].max().rolling(40, min_periods=1).max()
    params = [ 0.524498,   14.42004787, -0.33037808]
    user_ids = np.array(range(len(max_id)))
    ynew = test_func(user_ids, params[0], params[1], params[2])
    return ynew.round() + 21

In [72]:
def sample_recommendation(model, user_ids, k=20):
    n_users, n_items = data_csr.shape
    items = np.arange(n_items)
    answers = list()
    ynew = get_ynew()

    for user_id in user_ids:
#         scores = model.predict(user_id, items)
        scores = model.predict(user_id, items, user_features=df_user_np)
        top_items = items[np.argsort(-scores)]
        
        former_ids = train.loc[train['user_id']==user_id, 'item_id'].tolist()
        top_items = [item for item in top_items if item not in former_ids]
        top_items = [item for item in top_items if item < ynew[user_id]]
        answers.append(top_items[:k])
    return np.array(answers)


pred = sample_recommendation(model, test.iloc[:, 0].to_list())

In [73]:
assert min([len(k) for k in pred]) == 20

In [74]:
subm.iloc[:, 1:] = pred

In [66]:
subm.head()

Unnamed: 0,user_id,0,1,2,3,4,5,6,7,8,...,10,11,12,13,14,15,16,17,18,19
0,166,35,37,22,65,17,36,80,155,72,...,40,63,66,76,33,21,58,60,95,32
1,26,26,27,50,15,49,2,73,52,30,...,0,25,28,6,79,18,23,89,29,72
2,41,22,72,58,5,44,88,80,43,19,...,76,11,30,99,78,35,2,18,65,20
3,286,35,72,22,37,66,58,76,80,65,...,78,11,67,119,36,146,63,17,59,60
4,108,35,22,32,70,80,17,58,76,72,...,66,78,141,37,136,21,40,33,30,34


In [76]:
subm.to_csv('input/subm007.csv', index=None)