## Задача 1. Рекомендательные системы

Ваша задача — построить рекомендательную систему для социальной сети ОК. Для этого Вам необходимо обучить модель, которая для каждого пользователя (имя пользователя закодировано через customer_id) будет формировать список из рекомендуемых 7 групп (community_id), в которых он пока не состоит.

Примечание: Необходимо рекомендовать пользователю только те группы, в которые он еще не вступил, т.е. их нет в обучающей выборке.

Данные:
Для построения модели Вам будут доступны различные данные о пользователях и о том, в какие группы они вступили и когда, описания и данные групп, а также тестовый набор customer_id, для которых необходимо сделать прогноз. Ниже приведены описания всех полей в данных:

In [4]:
import pandas as pd
import numpy as np

from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

from implicit.evaluation import mean_average_precision_at_k
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
# from lightfm.cross_validation import random_train_test_split

import warnings
warnings.filterwarnings('ignore')

### Загрузка и обработка данных

In [5]:
X = pd.read_csv('train_df.tsv.gz', sep = '\t', index_col=0).drop_duplicates()
X.head(5)

Unnamed: 0,community_id,description,customers_count,messages_count,type,region_id,themeid,business_category,business_parent,customer_id,status,join_request_date
0,00dbc35ec26c5b6452a3259194b2d2f74eae7141d2bc75...,"Ванга сказала: ""Выживет пчела - выживет челове...",2966,1,7,,,,,947224211267aefcc2e3e9c524fdf46ce329bc638e8bf1...,A,
1,06107f1eae304c45d3e6324cc86f7d39662793a53b159b...,,8982,2,7,10424030000.0,246.0,,,b935c3390d82df612de19591d2dd16e1eff8e660746bb3...,A,
2,06107f1eae304c45d3e6324cc86f7d39662793a53b159b...,,8982,2,7,10424030000.0,246.0,,,ff873dee9234ee290d92eb6c15d7179aabf0653dbed3b3...,A,
3,06107f1eae304c45d3e6324cc86f7d39662793a53b159b...,,8982,2,7,10424030000.0,246.0,,,737ef0e849feda6cdfd350d19fcedaf17aebb757c0a1fc...,A,
4,097e4dae68029ecc3c6183ed4386aff71dcac227cbaff7...,Читать внимательно!!! Причины попадания в чёрн...,1035,1,7,10417790000.0,,FAN_CLUB,BLOG,780fb8c178af972826ac0f84e42ac4c6c1dcdeeec0c4dd...,A,


In [21]:
print("end")

end


In [20]:
from sklearn.preprocessing import LabelEncoder

In [7]:
encoder_com_id = LabelEncoder()
encoder_com_id.fit(X.community_id)
X["com_id"] = encoder_com_id.transform(X["community_id"])

In [8]:
encoder_usr_id = LabelEncoder()
encoder_usr_id.fit(X.customer_id)
X["cust_id"] = encoder_usr_id.transform(X["customer_id"])

In [9]:
from sklearn.preprocessing import LabelEncoder

lab_enc = LabelEncoder()
X['status'] = lab_enc.fit_transform(X['status'])

X['status'].unique()

array([0, 3, 4, 1, 2, 6, 5])

In [10]:
X['target'] = X['status'].apply(lambda x: 0 if x == 0 else 1)
X['target'].value_counts()

0    7276008
1    4254266
Name: target, dtype: int64

In [11]:
X = X.drop(columns = ['status'])

In [12]:
X.head()

Unnamed: 0,community_id,description,customers_count,messages_count,type,region_id,themeid,business_category,business_parent,customer_id,join_request_date,com_id,cust_id,target
0,00dbc35ec26c5b6452a3259194b2d2f74eae7141d2bc75...,"Ванга сказала: ""Выживет пчела - выживет челове...",2966,1,7,,,,,947224211267aefcc2e3e9c524fdf46ce329bc638e8bf1...,,486,908981,0
1,06107f1eae304c45d3e6324cc86f7d39662793a53b159b...,,8982,2,7,10424030000.0,246.0,,,b935c3390d82df612de19591d2dd16e1eff8e660746bb3...,,3578,1134537,0
2,06107f1eae304c45d3e6324cc86f7d39662793a53b159b...,,8982,2,7,10424030000.0,246.0,,,ff873dee9234ee290d92eb6c15d7179aabf0653dbed3b3...,,3578,1565515,0
3,06107f1eae304c45d3e6324cc86f7d39662793a53b159b...,,8982,2,7,10424030000.0,246.0,,,737ef0e849feda6cdfd350d19fcedaf17aebb757c0a1fc...,,3578,706749,0
4,097e4dae68029ecc3c6183ed4386aff71dcac227cbaff7...,Читать внимательно!!! Причины попадания в чёрн...,1035,1,7,10417790000.0,,FAN_CLUB,BLOG,780fb8c178af972826ac0f84e42ac4c6c1dcdeeec0c4dd...,,5627,734661,0


#### Пример user-based

In [13]:
%load_ext autoreload
%autoreload 2
from implicit.nearest_neighbours import CosineRecommender, BM25Recommender, TFIDFRecommender
from implicit.gpu.als import AlternatingLeastSquares, MatrixFactorizationBase
# from implicit.cpu.als import AlternatingLeastSquares, MatrixFactorizationBase

In [14]:
from implicit.gpu import HAS_CUDA

In [15]:
HAS_CUDA

False

In [16]:
import cupy as cp

In [17]:
df_reduced = X[['com_id', 'cust_id']].drop_duplicates()

In [18]:
data_full = csr_matrix((np.ones((df_reduced.shape[0],)).astype(float),
           (df_reduced['cust_id'].values,
            df_reduced['com_id'].values)))

In [19]:
data_full.sum()

11530274.0

In [22]:
data_full.sum() / (data_full.shape[0] * data_full.shape[1])

4.930053548426422e-05

In [78]:
# data_train, data_test = random_train_test_split(data_full, test_percentage=0.1)

In [4]:
# cr = CosineRecommender()
# cr = BM25Recommender()
# cr = TFIDFRecommender()
cr = AlternatingLeastSquares(factors=64)

cr.fit(data_full)

  check_blas_config()


NameError: name 'data_full' is not defined

In [41]:
# dftest_new = pd.DataFrame()
# dftest_new['customer_id_new'] = customers['customer_id_new'][:200]

In [65]:
df_sample = pd.read_csv("sample_submission/sample_submission_7.csv")

In [66]:
df_sample

Unnamed: 0,customer_id,community_id_1,community_id_2,community_id_3,community_id_4,community_id_5,community_id_6,community_id_7
0,d811a09d435ac2d1b1ed46e272405af10933b4711f4564...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
1,73821118fc33500efaa6b1adf8ab0e9d314abb15f62603...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
2,6381971c002097a94b8d7a03d9dc3e9ff7872a52c4764a...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
3,250d49b476af0c7d2b23dd39cbb6edff39d44c64f8ebc0...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
4,2da339a6bfe7791329ca8eb0a11544d4a9fb4c89572716...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
...,...,...,...,...,...,...,...,...
69041,606d6caf18a6209acaa46a1a0233a4bf22bc9885d70397...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
69042,a71392c55f8bace9bd108a1acbf9c2335d700fb041966b...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
69043,7b6d03389208df699aa116563dc5c45cbf5c725b47b415...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
69044,8d1c564ba9f96dafb0a856fbc65fc4f9fc020863fe3b3f...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...


In [70]:
df_sample["cust_id"] = encoder_usr_id.transform(df_sample["customer_id"])
ids_to_pred = df_sample["cust_id"]
all_recs = cr.recommend(ids_to_pred, data_full[ids_to_pred].astype(float), N=7)[0]

In [71]:
from tqdm.auto import tqdm

result_recommendations = {}
for ind, cust_id in enumerate(tqdm(ids_to_pred)):
    # best_recs_ids = encoder_com_id.inverse_transform(all_recs[ind])
    best_recs_ids = [encoder_com_id.classes_[i] for i in all_recs[ind]]     # быстрый вариант для обратной трансформации
    result_recommendations[cust_id] = best_recs_ids

  0%|          | 0/69046 [00:00<?, ?it/s]

In [72]:
result = pd.DataFrame(result_recommendations).transpose().reset_index()
result["index"] = encoder_usr_id.inverse_transform(result["index"])
result.rename({"index":"customer_id"}, inplace=True, axis=1)
result.rename({i:f"community_id_{i+1}" for i in range(7)},  inplace=True, axis=1)

In [73]:
result

Unnamed: 0,customer_id,community_id_1,community_id_2,community_id_3,community_id_4,community_id_5,community_id_6,community_id_7
0,d811a09d435ac2d1b1ed46e272405af10933b4711f4564...,e2d8710c3ca6ccb07ca3fbf66f0530be94903a58f5c2a8...,1379dd70dcdd575c69978f1b0b477d2f27222384313439...,ab5b19507c5a96c274a875f90d372882bde7956796df51...,6b5fcc67080b2a0359a15aac1b34a1f3e869211a887d0b...,768680a00db384987ff7532abc1ebc8062281b4ac60124...,caa892b683f011961b8868a73ce4999f667aa302d5a838...,5280bd59e190e9897b8481f1751f5508b76188da909b83...
1,73821118fc33500efaa6b1adf8ab0e9d314abb15f62603...,510a4b83bde73a7784dff4e1d89f67c943f4ff32b7f6b6...,f146596214865eb9e4abf6a4d494480e4ad79a0191bd25...,fc781628c86a7ee722f0efc9b75c9bf5a1360855a1e665...,e4970874a1fb7f58f51e5f933bfffada4778416221a29a...,d8a67fd0082c17f2590e4763c6aa925320841a252c9f65...,d2671510de5a76548a1919b25e32c73104821f4acc0c07...,d1af495ca20772cd9fcb43e8c27da695b5e9d93c64fc39...
2,6381971c002097a94b8d7a03d9dc3e9ff7872a52c4764a...,d42e10f2610272c545292b60844df2eb7a3ba47d6b4829...,9ecd44db17abc1fc665d38db272893274d5761665eb27c...,66cd88e7efdb90ca0107aa6808cea3b00df6dd44307349...,ce7a456428f786c44c3e27c039c4299885da5430ccfd05...,28ea66f6db0af1a3ed137ae38514d716e3124821f1731e...,bc5242520327e1f29ca4f583c08fcb185e8e103045038b...,5990a71b08f0479ada0942a02641e6e60e7eac210f7cf1...
3,250d49b476af0c7d2b23dd39cbb6edff39d44c64f8ebc0...,a8b15b543469eebe27c67c0c5897f6dc18e160cc5997c0...,892f2c17a6b9684c4dfd9834c0c6c8e7694c9faa00a0b3...,16183b4e07446d00c10d26deb9a1d62814d67fb6039b59...,96171649ea1057186a03c0f9bd216569642e431993897d...,a8d0b95bb7330d3a1171c439c5db88e1a3671edfbca950...,7ed0a5203360da0630e5ad51f2037a4d81b97e000d1cf1...,2165f7bfb5b82ede0cf8f3be218c7f57ef2cd1e282ae9c...
4,2da339a6bfe7791329ca8eb0a11544d4a9fb4c89572716...,3823a5e13dfd871d3ff72d5a69af9b77466d604f43cecd...,4d7cf04c8e56f0797012064220d0a45509a1264bc17f58...,aee65cc087f56024df2aedd0755db2a3d193cf2ca558b7...,1717cd8dbd989b4bd56009c53db75d564b252a3f3ed722...,b8ed0e918d9f11140c70ef08be19c197917f4f4455f255...,14d2fb073c9a2929f1fd9919cbbae50a5c597d2459d7f5...,2297e8137751faa879da9ceea4a47423a9eda35d9b0278...
...,...,...,...,...,...,...,...,...
69041,606d6caf18a6209acaa46a1a0233a4bf22bc9885d70397...,13bc935e69c0119a69b6cb8f2a75e0150aa6c579fa826b...,face1187302b0000986e09146fe04bdb5f39c4cd0ab33a...,57d99404c90c9344d3aec2c15ffb7b1a0f46c8a4f7157a...,e3adbab2d48db35c1a50f513ccc5416618fad8ef1e0567...,3b35ed784d26ce9dc17579cb1ada355ad5c850f2251f59...,274445fd08bfa77e358b0599c1adb91e6900d58335f0bb...,8792b896fc5d399bbacc274bc5deac4f25f6caadbf28cb...
69042,a71392c55f8bace9bd108a1acbf9c2335d700fb041966b...,b2e550048439263d779622fc9b4d221827573e9c6c5900...,f7c22598d4d68bc4478d3b07c5f1dfee87e77bbcbf6169...,c3b4828f5f3954a00f233447c256ee0513a0df083ac7e9...,dac62fcdc52f97daad2712837a808e304fd011bd5ec665...,b274af0e3a0afc5445b804d67cb2e789bbfff563c4478e...,3c8c56c125be2575c74750926654d11289941ff9b3f75e...,0aebd3b9934b9ec88cb78a99256bab2987798b93ff5a8d...
69043,7b6d03389208df699aa116563dc5c45cbf5c725b47b415...,ab5b19507c5a96c274a875f90d372882bde7956796df51...,5280bd59e190e9897b8481f1751f5508b76188da909b83...,f6a7fbac98fa02e64db8cb671ae5b529fa944d1f41a7f6...,de2b374004d89c21ab753e06dc73c2740403709edae8b2...,d7a49cba22dd3fba7de0ece2c8938d05ae38e03a08cb9f...,e015fa5e6ef36af3ea62a91263c0fba81bd56794a9716a...,b181b1aba2505c63c69e6058c07583c060719e611c22eb...
69044,8d1c564ba9f96dafb0a856fbc65fc4f9fc020863fe3b3f...,311db6eae03a8ae679ec1de8c2321a120938b3eaabec93...,223f53c800c0ff65d3c72982ce6d333fcb2af30f4fbbcc...,6a62fb8b0acef355567e31ac9f3c5a7c62e6f9d4883708...,f89f5b71ff4c44c8b10ace62a41f5c8aa4607497040c9c...,51b354f4db4e89508fad99ff9b466db96a307c9d1a1941...,dcefd8ed3f75ac28147ad61eb1969cc8c0111bb15b1867...,b5e8e4fa40a42f23952feffd67e9cceea80069d51d49af...


In [74]:
result.to_csv("version_4_tfidf_recommender_without_valid.csv", index=None)

In [72]:
pd.read_csv("first_version_answer.csv")

Unnamed: 0,customer_id,community_id_1,community_id_2,community_id_3,community_id_4,community_id_5,community_id_6,community_id_7
0,d811a09d435ac2d1b1ed46e272405af10933b4711f4564...,e2d8710c3ca6ccb07ca3fbf66f0530be94903a58f5c2a8...,1379dd70dcdd575c69978f1b0b477d2f27222384313439...,96c895a95437ee18134877c1b995c58b4e02144f21418c...,8e9a1562bf9ee8ed1ea3e049b6247df800bca422a5d479...,413474b8942855d448c77ab283d850cf40920f06f3f5f1...,6b5fcc67080b2a0359a15aac1b34a1f3e869211a887d0b...,95f663e12ea58f926d92f764b07674a53b29a5af1f5e2e...
1,73821118fc33500efaa6b1adf8ab0e9d314abb15f62603...,fc781628c86a7ee722f0efc9b75c9bf5a1360855a1e665...,ed6a5f49e78b2ee7f56117984fbb4fa674d32520b3e6e4...,e4970874a1fb7f58f51e5f933bfffada4778416221a29a...,e452f98f781fda8c619eb181ca17d76ae4c74bef741527...,d2671510de5a76548a1919b25e32c73104821f4acc0c07...,d1af495ca20772cd9fcb43e8c27da695b5e9d93c64fc39...,c87711e7a63b587d8335cfa5fbd04eaaa5158124238204...
2,6381971c002097a94b8d7a03d9dc3e9ff7872a52c4764a...,d42e10f2610272c545292b60844df2eb7a3ba47d6b4829...,66cd88e7efdb90ca0107aa6808cea3b00df6dd44307349...,28ea66f6db0af1a3ed137ae38514d716e3124821f1731e...,bc5242520327e1f29ca4f583c08fcb185e8e103045038b...,f1263dee5aa64941b5c72882b67fa58504fdab39b9b539...,ce7a456428f786c44c3e27c039c4299885da5430ccfd05...,b51aa24cf528ab4b642b14241fad0ac42757dbfb97db53...
3,250d49b476af0c7d2b23dd39cbb6edff39d44c64f8ebc0...,892f2c17a6b9684c4dfd9834c0c6c8e7694c9faa00a0b3...,a8b15b543469eebe27c67c0c5897f6dc18e160cc5997c0...,96171649ea1057186a03c0f9bd216569642e431993897d...,16183b4e07446d00c10d26deb9a1d62814d67fb6039b59...,2165f7bfb5b82ede0cf8f3be218c7f57ef2cd1e282ae9c...,6729664ac03bd48062612647597252d30d097b79e8308b...,a8d0b95bb7330d3a1171c439c5db88e1a3671edfbca950...
4,2da339a6bfe7791329ca8eb0a11544d4a9fb4c89572716...,3823a5e13dfd871d3ff72d5a69af9b77466d604f43cecd...,4d7cf04c8e56f0797012064220d0a45509a1264bc17f58...,1717cd8dbd989b4bd56009c53db75d564b252a3f3ed722...,b8ed0e918d9f11140c70ef08be19c197917f4f4455f255...,14d2fb073c9a2929f1fd9919cbbae50a5c597d2459d7f5...,2297e8137751faa879da9ceea4a47423a9eda35d9b0278...,b927c9e7135a558e2ed237459ccf02230e6c2c3400f776...
...,...,...,...,...,...,...,...,...
69041,606d6caf18a6209acaa46a1a0233a4bf22bc9885d70397...,face1187302b0000986e09146fe04bdb5f39c4cd0ab33a...,274445fd08bfa77e358b0599c1adb91e6900d58335f0bb...,57d99404c90c9344d3aec2c15ffb7b1a0f46c8a4f7157a...,13bc935e69c0119a69b6cb8f2a75e0150aa6c579fa826b...,8792b896fc5d399bbacc274bc5deac4f25f6caadbf28cb...,3b35ed784d26ce9dc17579cb1ada355ad5c850f2251f59...,bfbe0ab1789db400d4090745c5c54a6f09102048f04aa8...
69042,a71392c55f8bace9bd108a1acbf9c2335d700fb041966b...,0aebd3b9934b9ec88cb78a99256bab2987798b93ff5a8d...,124ea7f57c36038d8649d4a0edc9538733302f9b819b3f...,6570dd9605aa01ff78a0224a47cddc379bbf7b1db10d28...,508577156399f517258c56dd8c775eefcd28921df64b6f...,75f03af4f51c4c9fdf73fb9dd495964b776f2adc5b0154...,1c277c9b18d0d9175e7b849ad04d64fd770a0c71f20eb2...,8c3f63d3b0e551bbe6434b89cbc53045026a2a9e56276f...
69043,7b6d03389208df699aa116563dc5c45cbf5c725b47b415...,5280bd59e190e9897b8481f1751f5508b76188da909b83...,ab5b19507c5a96c274a875f90d372882bde7956796df51...,f6a7fbac98fa02e64db8cb671ae5b529fa944d1f41a7f6...,d7a49cba22dd3fba7de0ece2c8938d05ae38e03a08cb9f...,e015fa5e6ef36af3ea62a91263c0fba81bd56794a9716a...,de2b374004d89c21ab753e06dc73c2740403709edae8b2...,4e00486bc0c8c8c3de7172f2ae98eb98815a0e9942774f...
69044,8d1c564ba9f96dafb0a856fbc65fc4f9fc020863fe3b3f...,4cd507db4bc028745427889e579bbf3591662d2885a088...,1245eef8aa5ad80b9ad1edf41426b6305e3f2d2f923054...,311db6eae03a8ae679ec1de8c2321a120938b3eaabec93...,6a62fb8b0acef355567e31ac9f3c5a7c62e6f9d4883708...,f89f5b71ff4c44c8b10ace62a41f5c8aa4607497040c9c...,51b354f4db4e89508fad99ff9b466db96a307c9d1a1941...,e2ee24a906bd15af09084c2cf73bc4cdd028391403de51...


In [73]:
pd.read_csv("sample_submission/sample_submission_7.csv")

Unnamed: 0,customer_id,community_id_1,community_id_2,community_id_3,community_id_4,community_id_5,community_id_6,community_id_7
0,d811a09d435ac2d1b1ed46e272405af10933b4711f4564...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
1,73821118fc33500efaa6b1adf8ab0e9d314abb15f62603...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
2,6381971c002097a94b8d7a03d9dc3e9ff7872a52c4764a...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
3,250d49b476af0c7d2b23dd39cbb6edff39d44c64f8ebc0...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
4,2da339a6bfe7791329ca8eb0a11544d4a9fb4c89572716...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
...,...,...,...,...,...,...,...,...
69041,606d6caf18a6209acaa46a1a0233a4bf22bc9885d70397...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
69042,a71392c55f8bace9bd108a1acbf9c2335d700fb041966b...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
69043,7b6d03389208df699aa116563dc5c45cbf5c725b47b415...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...
69044,8d1c564ba9f96dafb0a856fbc65fc4f9fc020863fe3b3f...,f24c996d4123138fbe5d2ac482dcf3ccefe6ddb30de3fe...,f2815ab120cebd48c350806543d9567766ae95ea3a589c...,d9643ac39bb7f323ca6feece4633aeec1e045cd574fa95...,349743ef571518d2b9885eb88626e2488e21536da235dd...,1c68c19a80263399d946e6a6ba7e354c7ca7a1571f7833...,cf454808981a05a149d6dfd09b80924627e29945d2aa7e...,1c01b5a5a10ad6f01e189c9720856ab89dc2c268dc6741...


In [40]:
recs = cr.recommend(customer_id, data_full[dftest_new.values.squeeze()].astype(float))
customer_recomms = [i for i in recs[0]][:7]
customer_recomms

[31455, 85540, 7173, 125379, 28914, 23602, 9184]

## Что еще можно было попробовать:

* Ещё один способ - Матричная факторизация (SVD)


* Другие метрики близости
  * Косинусное расстояние
  * Коэффициент корреляции Пирсона
  * Манхэттенское расстояние
  * Евклидово расстояние
  * Коэффициент Жаккара

  
* Совершенно точно - подбор гиперпараметров - того же k (любой):
  * GridSearchCV - подбор по сетке
  * Optuna - байесовский алгоритм быстрого подбора гиперпараметров
  

* Объединение нескольких решений:
  *	Взвешенная система, в которой результаты отдельных алгоритмов объединяются с помощью взвешенной суммы;
  *	Каскадная система, в которой алгоритмы обрабатывают данные по цепочке, уточняя результат;
