In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity

In [2]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [3]:
n_users, n_items = train['user_id'].nunique(), train['item_id'].nunique()

In [4]:
def downvote_seen(scores, data):
    scores_processed = scores.copy()
    for i in range(scores_processed.shape[0]):
        viewed = list(data[data['user_id'] == i]['item_id'])
        scores_processed[i, viewed] = scores.min()
    return scores_processed

def get_predictions(scores, k=20):
    return scores.argsort(axis=1)[:, -k:][:, ::-1]

def prepare_submission(pred, sub_name):
    sub = pd.DataFrame(pred)
    sub['user_id'] = np.arange(sub.shape[0])
    sample_sub = pd.read_csv('data/sample_submission.csv')
    sample_sub[['user_id']].merge(sub).to_csv(f'submissions/{sub_name}.csv', index=False)

# SVD

In [5]:
interactions = np.zeros((n_users, n_items))
interactions[train['user_id'].values, train['item_id'].values] = train['like'].values*2 - 1

In [6]:
u, s, vh = np.linalg.svd(interactions)

In [7]:
k = 2

scores = (u[:, :k] * s[:k]) @ vh[:k]
scores = downvote_seen(scores, train)
pred = get_predictions(scores, 20)

In [8]:
# Score on the leaderboard: 0.483
prepare_submission(pred, f'svd_{k}_final')

# Embeddings

In [9]:
user_features = pd.read_csv('data/user_features.csv')
item_features = pd.read_csv('data/item-features.csv')

In [10]:
user_features_arr = user_features.sort_values('user_id').drop('user_id', axis=1).values
item_features_arr = item_features.sort_values('item_id').drop('item_id', axis=1).values

In [11]:
scores = user_features_arr @ item_features_arr.T
scores = downvote_seen(scores, train)
pred = get_predictions(scores, 20)

In [12]:
# Score on the leaderboard: 0.509
prepare_submission(pred, 'embeddings_final')

# User KNN

In [13]:
interactions = np.zeros((n_users, n_items))
interactions[train['user_id'].values, train['item_id'].values] = train['like'].values*2 - 1

In [14]:
S = cosine_similarity(interactions) - np.eye(n_users)

In [15]:
k = 100

for i in range(S.shape[0]):
    values = S[i]
    inds = np.argsort(values)
    S[i, inds[:-k]] = 0

In [16]:
scores = S @ interactions
scores = downvote_seen(scores, train)
pred = get_predictions(scores, 20)

In [17]:
# Score on the leaderboard: 0.586
prepare_submission(pred, f'user_knn_{k}_final')