In [1]:
import pandas as pd
import networkx as nx
import implicit
from utils import get_test_edges, get_graph, get_links, recall_at_k

## Data preparation, train-test split

In [2]:
data_tr = pd.read_csv("vk_friends_test_candidate/data_tr.csv")
full_data = data_tr.drop('t', axis=1)
test_edges = get_test_edges(data_tr, edges_threshold=6, n_smallest=3)
full_graph = get_graph(full_data)
train_graph = full_graph.copy()
train_graph.remove_edges_from(test_edges.values)

In [3]:
print("Test edges ratio:", test_edges.shape[0] / len(full_graph.edges()))

Test edges ratio: 0.11707708449052645


## Recommender based in ALS

In [4]:
adj_matrix = nx.adjacency_matrix(train_graph)

In [5]:
users_to_index = {value : index for index, value in zip(range(len(full_graph.nodes())), full_graph.nodes())}
index_to_users = {index : value for index, value in zip(range(len(full_graph.nodes())), full_graph.nodes())}

In [6]:
model = implicit.als.AlternatingLeastSquares(factors=200, alpha=2, regularization=0.01)
model.fit(adj_matrix)



  0%|          | 0/15 [00:00<?, ?it/s]

In [7]:
def index_to_users_f(recommendation_id, index_to_users=index_to_users):
    """An auxiliary function that maps indeces from adjacency matrix to the actual user id."""
    return [index_to_users[index] for index in recommendation_id]

In [8]:
test_users = test_edges.u.unique()
userids = [users_to_index[user] for user in test_users] 
recommendation_ids, scores = model.recommend(userids, adj_matrix[userids], N=10, filter_already_liked_items=True)
recommendations = list(map(index_to_users_f, recommendation_ids))

## Evaluation

In [9]:
dict_actual = get_links(test_edges)

In [10]:
dict_predicted = {user: set(recommendation) for user,recommendation in zip(test_users,recommendations) }

In [11]:
recall_at_k(dict_predicted, dict_actual)

0.22263222632226323

## Writing results to a file

In [12]:
all_users = list(full_graph.nodes())

In [13]:
userids = [users_to_index[user] for user in all_users] 
recs, scores_all = model.recommend(userids, adj_matrix[userids], N=10, filter_already_liked_items=False)
recommendations_all = list(map(index_to_users_f, recs))
dict_predicted_all = {user: set(recommendation) for user,recommendation in zip(test_users,recommendations_all) }

In [14]:
with open('recommendation_1_als.txt', 'w') as file:
    for key, value in dict_predicted_all.items(): 
         file.write('%s: %s\n' % (key,  ", ".join(map(str,value))))

## Ranked

In [15]:
user_ids = pd.read_csv("vk_friends_test_candidate/user_ids.csv")

In [16]:
users_in_graph = set(user_ids.u.values) & set(all_users)
new_users = set(user_ids.u.values) - set(all_users)
userids = [users_to_index[user] for user in users_in_graph] 
recs, scores_all = model.recommend(userids, adj_matrix[userids], N=10, filter_already_liked_items=False)
recommendations_all = list(map(index_to_users_f, recs))
dict_predicted = {user: recommendation for user, recommendation in zip(users_in_graph,recommendations_all)}

In [17]:
the_most_popular_users = list(full_data.u.value_counts()[:10].index)
for user in new_users:
    dict_predicted[user] = the_most_popular_users

## Writing results to a file

In [18]:
with open('recommendation_2_als.txt', 'w') as file:
    for key, value in dict_predicted.items(): 
         file.write('%s: %s\n' % (key,  ", ".join(map(str,value))))