In [1]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tqdm.notebook import tqdm
import pickle

In [2]:
train_joke_df = pd.read_csv(r'..\data\recsys-in-practice\train_joke_df.csv')

In [3]:
train_joke_df["UID"] = train_joke_df["UID"].astype(int)
train_joke_df["JID"] = train_joke_df["JID"].astype(int)

train_joke_df['UID'] = train_joke_df['UID'] - 1
train_joke_df['JID'] = train_joke_df['JID'] - 1

In [4]:
train_joke_df

Unnamed: 0,UID,JID,Rating
0,18028,5,-1.26
1,3297,63,-4.17
2,3365,57,0.92
3,12734,91,3.69
4,11364,37,-6.60
...,...,...,...
1448359,22603,25,2.82
1448360,22254,35,-1.94
1448361,21055,39,-9.56
1448362,12327,96,0.87


In [5]:
train_df, valid_df = train_test_split(train_joke_df, test_size=0.0007, random_state=42)

len(valid_df)

1014

In [6]:
train_df["UID"] = train_df["UID"].astype(int)
train_df["JID"] = train_df["JID"].astype(int)
valid_df["UID"] = valid_df["UID"].astype(int)
valid_df["JID"] = valid_df["JID"].astype(int)


# сделаем сортировку и перепишем index
train_df = train_df.sort_values(by=['UID', 'JID'])
train_df = train_df.reset_index(drop=True)

valid_df = valid_df.sort_values(by=['UID', 'JID'])
valid_df = valid_df.reset_index(drop=True)

In [7]:
from catboost import CatBoostRanker, Pool, MetricVisualizer

In [8]:
features = ['UID', 'JID']
cat_features = ['UID', 'JID']

In [9]:
train_pool = Pool(train_df[features], label=train_df['Rating'], group_id=train_df['UID'], cat_features=cat_features)
valid_pool = Pool(valid_df[features], label=valid_df['Rating'], group_id=valid_df['UID'], cat_features=cat_features)


In [10]:
default_parameters = {
    'iterations': 2000,
    'custom_metric': 'RMSE',
    'random_seed': 0,
    'train_dir':'RMSE',
    'objective':'RMSE',
    'loss_function':'RMSE',
    'eval_metric':'RMSE',
}

In [11]:
model = CatBoostRanker(**default_parameters)
model.fit(train_pool, eval_set=valid_pool, plot=True)



MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.104587
0:	learn: 5.1149050	test: 5.0327435	best: 5.0327435 (0)	total: 433ms	remaining: 14m 25s
1:	learn: 5.0158097	test: 4.9133496	best: 4.9133496 (1)	total: 673ms	remaining: 11m 12s
2:	learn: 4.9346973	test: 4.8152039	best: 4.8152039 (2)	total: 795ms	remaining: 8m 49s
3:	learn: 4.8673783	test: 4.7360597	best: 4.7360597 (3)	total: 912ms	remaining: 7m 35s
4:	learn: 4.8096012	test: 4.6663701	best: 4.6663701 (4)	total: 1.03s	remaining: 6m 51s
5:	learn: 4.7615122	test: 4.6070215	best: 4.6070215 (5)	total: 1.27s	remaining: 7m 3s
6:	learn: 4.7222274	test: 4.5567861	best: 4.5567861 (6)	total: 1.39s	remaining: 6m 36s
7:	learn: 4.6884164	test: 4.5087800	best: 4.5087800 (7)	total: 1.51s	remaining: 6m 16s
8:	learn: 4.6613761	test: 4.4707866	best: 4.4707866 (8)	total: 1.63s	remaining: 6m 1s
9:	learn: 4.6384986	test: 4.4392065	best: 4.4392065 (9)	total: 1.75s	remaining: 5m 48s
10:	learn: 4.6192990	test: 4.4128329	best: 4.4128329 (10)	total: 1.87s	remaining: 5m 37s
11:	learn: 

94:	learn: 4.5092032	test: 4.2142352	best: 4.2142352 (94)	total: 11.8s	remaining: 3m 56s
95:	learn: 4.5091695	test: 4.2140105	best: 4.2140105 (95)	total: 11.9s	remaining: 3m 56s
96:	learn: 4.5091273	test: 4.2139917	best: 4.2139917 (96)	total: 12.1s	remaining: 3m 56s
97:	learn: 4.5090739	test: 4.2141879	best: 4.2139917 (96)	total: 12.2s	remaining: 3m 56s
98:	learn: 4.5090546	test: 4.2140694	best: 4.2139917 (96)	total: 12.3s	remaining: 3m 56s
99:	learn: 4.5090538	test: 4.2140658	best: 4.2139917 (96)	total: 12.4s	remaining: 3m 55s
100:	learn: 4.5090103	test: 4.2137585	best: 4.2137585 (100)	total: 12.5s	remaining: 3m 54s
101:	learn: 4.5089766	test: 4.2137104	best: 4.2137104 (101)	total: 12.6s	remaining: 3m 54s
102:	learn: 4.5089743	test: 4.2137321	best: 4.2137104 (101)	total: 12.7s	remaining: 3m 53s
103:	learn: 4.5089669	test: 4.2137731	best: 4.2137104 (101)	total: 12.8s	remaining: 3m 53s
104:	learn: 4.5089428	test: 4.2136136	best: 4.2136136 (104)	total: 13s	remaining: 3m 53s
105:	learn: 4

186:	learn: 4.5068834	test: 4.2085764	best: 4.2085764 (186)	total: 22.9s	remaining: 3m 42s
187:	learn: 4.5068775	test: 4.2085846	best: 4.2085764 (186)	total: 23s	remaining: 3m 41s
188:	learn: 4.5068446	test: 4.2084355	best: 4.2084355 (188)	total: 23.2s	remaining: 3m 41s
189:	learn: 4.5068154	test: 4.2084773	best: 4.2084355 (188)	total: 23.3s	remaining: 3m 41s
190:	learn: 4.5068057	test: 4.2084478	best: 4.2084355 (188)	total: 23.4s	remaining: 3m 41s
191:	learn: 4.5067972	test: 4.2084551	best: 4.2084355 (188)	total: 23.5s	remaining: 3m 41s
192:	learn: 4.5067797	test: 4.2083965	best: 4.2083965 (192)	total: 23.6s	remaining: 3m 41s
193:	learn: 4.5067538	test: 4.2083983	best: 4.2083965 (192)	total: 23.8s	remaining: 3m 41s
194:	learn: 4.5067180	test: 4.2083522	best: 4.2083522 (194)	total: 23.9s	remaining: 3m 41s
195:	learn: 4.5067084	test: 4.2082594	best: 4.2082594 (195)	total: 24s	remaining: 3m 40s
196:	learn: 4.5066981	test: 4.2081920	best: 4.2081920 (196)	total: 24.1s	remaining: 3m 40s
197

278:	learn: 4.5057114	test: 4.2063234	best: 4.2062814 (276)	total: 34.8s	remaining: 3m 34s
279:	learn: 4.5057021	test: 4.2062563	best: 4.2062563 (279)	total: 35s	remaining: 3m 34s
280:	learn: 4.5056990	test: 4.2062415	best: 4.2062415 (280)	total: 35.1s	remaining: 3m 34s
281:	learn: 4.5056967	test: 4.2062413	best: 4.2062413 (281)	total: 35.2s	remaining: 3m 34s
282:	learn: 4.5056859	test: 4.2062295	best: 4.2062295 (282)	total: 35.4s	remaining: 3m 34s
283:	learn: 4.5056839	test: 4.2062348	best: 4.2062295 (282)	total: 35.5s	remaining: 3m 34s
284:	learn: 4.5056754	test: 4.2062158	best: 4.2062158 (284)	total: 35.6s	remaining: 3m 34s
285:	learn: 4.5056682	test: 4.2062457	best: 4.2062158 (284)	total: 35.8s	remaining: 3m 34s
286:	learn: 4.5056568	test: 4.2061000	best: 4.2061000 (286)	total: 35.9s	remaining: 3m 34s
287:	learn: 4.5056501	test: 4.2060869	best: 4.2060869 (287)	total: 36s	remaining: 3m 34s
288:	learn: 4.5056218	test: 4.2060002	best: 4.2060002 (288)	total: 36.1s	remaining: 3m 34s
289

370:	learn: 4.5049305	test: 4.2051373	best: 4.2048668 (344)	total: 46.8s	remaining: 3m 25s
371:	learn: 4.5049250	test: 4.2050996	best: 4.2048668 (344)	total: 46.9s	remaining: 3m 25s
372:	learn: 4.5049115	test: 4.2050159	best: 4.2048668 (344)	total: 47.1s	remaining: 3m 25s
373:	learn: 4.5049000	test: 4.2049960	best: 4.2048668 (344)	total: 47.2s	remaining: 3m 25s
374:	learn: 4.5048981	test: 4.2049766	best: 4.2048668 (344)	total: 47.3s	remaining: 3m 25s
375:	learn: 4.5048923	test: 4.2049567	best: 4.2048668 (344)	total: 47.5s	remaining: 3m 24s
376:	learn: 4.5048858	test: 4.2050180	best: 4.2048668 (344)	total: 47.6s	remaining: 3m 24s
377:	learn: 4.5048794	test: 4.2050352	best: 4.2048668 (344)	total: 47.7s	remaining: 3m 24s
378:	learn: 4.5048778	test: 4.2050244	best: 4.2048668 (344)	total: 47.9s	remaining: 3m 24s
379:	learn: 4.5048738	test: 4.2050418	best: 4.2048668 (344)	total: 48s	remaining: 3m 24s
380:	learn: 4.5048670	test: 4.2051300	best: 4.2048668 (344)	total: 48.1s	remaining: 3m 24s
3

462:	learn: 4.5043519	test: 4.2046780	best: 4.2043754 (445)	total: 58.8s	remaining: 3m 15s
463:	learn: 4.5043355	test: 4.2046832	best: 4.2043754 (445)	total: 58.9s	remaining: 3m 14s
464:	learn: 4.5043334	test: 4.2046573	best: 4.2043754 (445)	total: 59s	remaining: 3m 14s
465:	learn: 4.5043222	test: 4.2046911	best: 4.2043754 (445)	total: 59.2s	remaining: 3m 14s
466:	learn: 4.5043181	test: 4.2047044	best: 4.2043754 (445)	total: 59.3s	remaining: 3m 14s
467:	learn: 4.5043118	test: 4.2046919	best: 4.2043754 (445)	total: 59.4s	remaining: 3m 14s
468:	learn: 4.5043076	test: 4.2046708	best: 4.2043754 (445)	total: 59.5s	remaining: 3m 14s
469:	learn: 4.5043020	test: 4.2046714	best: 4.2043754 (445)	total: 59.7s	remaining: 3m 14s
470:	learn: 4.5042982	test: 4.2046302	best: 4.2043754 (445)	total: 59.8s	remaining: 3m 14s
471:	learn: 4.5042964	test: 4.2046197	best: 4.2043754 (445)	total: 59.9s	remaining: 3m 14s
472:	learn: 4.5042950	test: 4.2046091	best: 4.2043754 (445)	total: 1m	remaining: 3m 13s
473:

554:	learn: 4.5038497	test: 4.2040086	best: 4.2037843 (540)	total: 1m 10s	remaining: 3m 3s
555:	learn: 4.5038438	test: 4.2040140	best: 4.2037843 (540)	total: 1m 10s	remaining: 3m 3s
556:	learn: 4.5038403	test: 4.2039988	best: 4.2037843 (540)	total: 1m 10s	remaining: 3m 3s
557:	learn: 4.5038369	test: 4.2039995	best: 4.2037843 (540)	total: 1m 11s	remaining: 3m 3s
558:	learn: 4.5038300	test: 4.2039592	best: 4.2037843 (540)	total: 1m 11s	remaining: 3m 3s
559:	learn: 4.5038275	test: 4.2039874	best: 4.2037843 (540)	total: 1m 11s	remaining: 3m 3s
560:	learn: 4.5038253	test: 4.2039513	best: 4.2037843 (540)	total: 1m 11s	remaining: 3m 3s
561:	learn: 4.5038236	test: 4.2039593	best: 4.2037843 (540)	total: 1m 11s	remaining: 3m 3s
562:	learn: 4.5038170	test: 4.2039710	best: 4.2037843 (540)	total: 1m 11s	remaining: 3m 3s
563:	learn: 4.5037991	test: 4.2039754	best: 4.2037843 (540)	total: 1m 11s	remaining: 3m 2s
564:	learn: 4.5037983	test: 4.2039508	best: 4.2037843 (540)	total: 1m 11s	remaining: 3m 2s

644:	learn: 4.5033941	test: 4.2035410	best: 4.2033236 (606)	total: 1m 22s	remaining: 2m 52s
645:	learn: 4.5033917	test: 4.2035767	best: 4.2033236 (606)	total: 1m 22s	remaining: 2m 52s
646:	learn: 4.5033854	test: 4.2035084	best: 4.2033236 (606)	total: 1m 22s	remaining: 2m 52s
647:	learn: 4.5033816	test: 4.2034851	best: 4.2033236 (606)	total: 1m 22s	remaining: 2m 52s
648:	learn: 4.5033662	test: 4.2033986	best: 4.2033236 (606)	total: 1m 22s	remaining: 2m 52s
649:	learn: 4.5033623	test: 4.2033842	best: 4.2033236 (606)	total: 1m 22s	remaining: 2m 52s
650:	learn: 4.5033608	test: 4.2033806	best: 4.2033236 (606)	total: 1m 22s	remaining: 2m 51s
651:	learn: 4.5033562	test: 4.2032982	best: 4.2032982 (651)	total: 1m 23s	remaining: 2m 51s
652:	learn: 4.5033513	test: 4.2032210	best: 4.2032210 (652)	total: 1m 23s	remaining: 2m 51s
653:	learn: 4.5033457	test: 4.2031441	best: 4.2031441 (653)	total: 1m 23s	remaining: 2m 51s
654:	learn: 4.5033410	test: 4.2030882	best: 4.2030882 (654)	total: 1m 23s	remain

734:	learn: 4.5030266	test: 4.2021739	best: 4.2021619 (733)	total: 1m 33s	remaining: 2m 41s
735:	learn: 4.5030189	test: 4.2021896	best: 4.2021619 (733)	total: 1m 33s	remaining: 2m 41s
736:	learn: 4.5030162	test: 4.2021954	best: 4.2021619 (733)	total: 1m 34s	remaining: 2m 41s
737:	learn: 4.5030120	test: 4.2022567	best: 4.2021619 (733)	total: 1m 34s	remaining: 2m 41s
738:	learn: 4.5030104	test: 4.2022843	best: 4.2021619 (733)	total: 1m 34s	remaining: 2m 40s
739:	learn: 4.5030092	test: 4.2022833	best: 4.2021619 (733)	total: 1m 34s	remaining: 2m 40s
740:	learn: 4.5030080	test: 4.2022819	best: 4.2021619 (733)	total: 1m 34s	remaining: 2m 40s
741:	learn: 4.5030052	test: 4.2022227	best: 4.2021619 (733)	total: 1m 34s	remaining: 2m 40s
742:	learn: 4.5030008	test: 4.2022091	best: 4.2021619 (733)	total: 1m 34s	remaining: 2m 40s
743:	learn: 4.5029961	test: 4.2022103	best: 4.2021619 (733)	total: 1m 34s	remaining: 2m 40s
744:	learn: 4.5029944	test: 4.2022017	best: 4.2021619 (733)	total: 1m 35s	remain

824:	learn: 4.5027068	test: 4.2021692	best: 4.2021530 (821)	total: 1m 45s	remaining: 2m 30s
825:	learn: 4.5027059	test: 4.2021653	best: 4.2021530 (821)	total: 1m 45s	remaining: 2m 30s
826:	learn: 4.5027034	test: 4.2021659	best: 4.2021530 (821)	total: 1m 45s	remaining: 2m 30s
827:	learn: 4.5026988	test: 4.2021712	best: 4.2021530 (821)	total: 1m 45s	remaining: 2m 29s
828:	learn: 4.5026930	test: 4.2022084	best: 4.2021530 (821)	total: 1m 46s	remaining: 2m 29s
829:	learn: 4.5026896	test: 4.2022236	best: 4.2021530 (821)	total: 1m 46s	remaining: 2m 29s
830:	learn: 4.5026880	test: 4.2022099	best: 4.2021530 (821)	total: 1m 46s	remaining: 2m 29s
831:	learn: 4.5026848	test: 4.2021627	best: 4.2021530 (821)	total: 1m 46s	remaining: 2m 29s
832:	learn: 4.5026834	test: 4.2021649	best: 4.2021530 (821)	total: 1m 46s	remaining: 2m 29s
833:	learn: 4.5026824	test: 4.2021456	best: 4.2021456 (833)	total: 1m 46s	remaining: 2m 29s
834:	learn: 4.5026781	test: 4.2021508	best: 4.2021456 (833)	total: 1m 46s	remain

914:	learn: 4.5024176	test: 4.2022170	best: 4.2019819 (853)	total: 1m 57s	remaining: 2m 19s
915:	learn: 4.5024159	test: 4.2022061	best: 4.2019819 (853)	total: 1m 57s	remaining: 2m 18s
916:	learn: 4.5024112	test: 4.2022583	best: 4.2019819 (853)	total: 1m 57s	remaining: 2m 18s
917:	learn: 4.5024106	test: 4.2022500	best: 4.2019819 (853)	total: 1m 57s	remaining: 2m 18s
918:	learn: 4.5024063	test: 4.2022899	best: 4.2019819 (853)	total: 1m 57s	remaining: 2m 18s
919:	learn: 4.5024040	test: 4.2023414	best: 4.2019819 (853)	total: 1m 57s	remaining: 2m 18s
920:	learn: 4.5024031	test: 4.2023631	best: 4.2019819 (853)	total: 1m 58s	remaining: 2m 18s
921:	learn: 4.5024012	test: 4.2023594	best: 4.2019819 (853)	total: 1m 58s	remaining: 2m 18s
922:	learn: 4.5023973	test: 4.2023488	best: 4.2019819 (853)	total: 1m 58s	remaining: 2m 18s
923:	learn: 4.5023913	test: 4.2024067	best: 4.2019819 (853)	total: 1m 58s	remaining: 2m 17s
924:	learn: 4.5023907	test: 4.2024026	best: 4.2019819 (853)	total: 1m 58s	remain

1006:	learn: 4.5021509	test: 4.2024525	best: 4.2019819 (853)	total: 2m 9s	remaining: 2m 7s
1007:	learn: 4.5021479	test: 4.2024205	best: 4.2019819 (853)	total: 2m 9s	remaining: 2m 7s
1008:	learn: 4.5021459	test: 4.2024152	best: 4.2019819 (853)	total: 2m 9s	remaining: 2m 7s
1009:	learn: 4.5021389	test: 4.2023940	best: 4.2019819 (853)	total: 2m 9s	remaining: 2m 7s
1010:	learn: 4.5021356	test: 4.2024051	best: 4.2019819 (853)	total: 2m 9s	remaining: 2m 7s
1011:	learn: 4.5021327	test: 4.2024066	best: 4.2019819 (853)	total: 2m 9s	remaining: 2m 6s
1012:	learn: 4.5021247	test: 4.2023923	best: 4.2019819 (853)	total: 2m 10s	remaining: 2m 6s
1013:	learn: 4.5021217	test: 4.2023745	best: 4.2019819 (853)	total: 2m 10s	remaining: 2m 6s
1014:	learn: 4.5021178	test: 4.2023979	best: 4.2019819 (853)	total: 2m 10s	remaining: 2m 6s
1015:	learn: 4.5021170	test: 4.2023979	best: 4.2019819 (853)	total: 2m 10s	remaining: 2m 6s
1016:	learn: 4.5021078	test: 4.2024211	best: 4.2019819 (853)	total: 2m 10s	remaining: 

1096:	learn: 4.5018972	test: 4.2024724	best: 4.2019819 (853)	total: 2m 21s	remaining: 1m 56s
1097:	learn: 4.5018964	test: 4.2024872	best: 4.2019819 (853)	total: 2m 21s	remaining: 1m 56s
1098:	learn: 4.5018949	test: 4.2025011	best: 4.2019819 (853)	total: 2m 21s	remaining: 1m 55s
1099:	learn: 4.5018901	test: 4.2024684	best: 4.2019819 (853)	total: 2m 21s	remaining: 1m 55s
1100:	learn: 4.5018891	test: 4.2024696	best: 4.2019819 (853)	total: 2m 21s	remaining: 1m 55s
1101:	learn: 4.5018867	test: 4.2024171	best: 4.2019819 (853)	total: 2m 21s	remaining: 1m 55s
1102:	learn: 4.5018829	test: 4.2024049	best: 4.2019819 (853)	total: 2m 21s	remaining: 1m 55s
1103:	learn: 4.5018827	test: 4.2024080	best: 4.2019819 (853)	total: 2m 21s	remaining: 1m 55s
1104:	learn: 4.5018822	test: 4.2024162	best: 4.2019819 (853)	total: 2m 22s	remaining: 1m 55s
1105:	learn: 4.5018801	test: 4.2024142	best: 4.2019819 (853)	total: 2m 22s	remaining: 1m 54s
1106:	learn: 4.5018765	test: 4.2024240	best: 4.2019819 (853)	total: 2m

1186:	learn: 4.5016287	test: 4.2019533	best: 4.2018452 (1180)	total: 2m 32s	remaining: 1m 44s
1187:	learn: 4.5016275	test: 4.2019462	best: 4.2018452 (1180)	total: 2m 32s	remaining: 1m 44s
1188:	learn: 4.5016245	test: 4.2019527	best: 4.2018452 (1180)	total: 2m 32s	remaining: 1m 44s
1189:	learn: 4.5016238	test: 4.2019522	best: 4.2018452 (1180)	total: 2m 33s	remaining: 1m 44s
1190:	learn: 4.5016219	test: 4.2019550	best: 4.2018452 (1180)	total: 2m 33s	remaining: 1m 44s
1191:	learn: 4.5016176	test: 4.2019938	best: 4.2018452 (1180)	total: 2m 33s	remaining: 1m 43s
1192:	learn: 4.5016160	test: 4.2020148	best: 4.2018452 (1180)	total: 2m 33s	remaining: 1m 43s
1193:	learn: 4.5016121	test: 4.2020119	best: 4.2018452 (1180)	total: 2m 33s	remaining: 1m 43s
1194:	learn: 4.5016110	test: 4.2020100	best: 4.2018452 (1180)	total: 2m 33s	remaining: 1m 43s
1195:	learn: 4.5016089	test: 4.2019875	best: 4.2018452 (1180)	total: 2m 33s	remaining: 1m 43s
1196:	learn: 4.5016077	test: 4.2020182	best: 4.2018452 (1180

1274:	learn: 4.5014092	test: 4.2018280	best: 4.2017390 (1256)	total: 2m 44s	remaining: 1m 33s
1275:	learn: 4.5014077	test: 4.2018242	best: 4.2017390 (1256)	total: 2m 44s	remaining: 1m 33s
1276:	learn: 4.5014033	test: 4.2017989	best: 4.2017390 (1256)	total: 2m 44s	remaining: 1m 33s
1277:	learn: 4.5014014	test: 4.2017973	best: 4.2017390 (1256)	total: 2m 44s	remaining: 1m 32s
1278:	learn: 4.5013966	test: 4.2018101	best: 4.2017390 (1256)	total: 2m 44s	remaining: 1m 32s
1279:	learn: 4.5013951	test: 4.2018160	best: 4.2017390 (1256)	total: 2m 44s	remaining: 1m 32s
1280:	learn: 4.5013914	test: 4.2018592	best: 4.2017390 (1256)	total: 2m 44s	remaining: 1m 32s
1281:	learn: 4.5013889	test: 4.2018602	best: 4.2017390 (1256)	total: 2m 45s	remaining: 1m 32s
1282:	learn: 4.5013869	test: 4.2018507	best: 4.2017390 (1256)	total: 2m 45s	remaining: 1m 32s
1283:	learn: 4.5013846	test: 4.2018482	best: 4.2017390 (1256)	total: 2m 45s	remaining: 1m 32s
1284:	learn: 4.5013842	test: 4.2018475	best: 4.2017390 (1256

1362:	learn: 4.5011876	test: 4.2015600	best: 4.2015600 (1362)	total: 2m 55s	remaining: 1m 22s
1363:	learn: 4.5011833	test: 4.2015599	best: 4.2015599 (1363)	total: 2m 55s	remaining: 1m 21s
1364:	learn: 4.5011809	test: 4.2015305	best: 4.2015305 (1364)	total: 2m 55s	remaining: 1m 21s
1365:	learn: 4.5011796	test: 4.2015596	best: 4.2015305 (1364)	total: 2m 55s	remaining: 1m 21s
1366:	learn: 4.5011785	test: 4.2015619	best: 4.2015305 (1364)	total: 2m 56s	remaining: 1m 21s
1367:	learn: 4.5011763	test: 4.2015585	best: 4.2015305 (1364)	total: 2m 56s	remaining: 1m 21s
1368:	learn: 4.5011747	test: 4.2015447	best: 4.2015305 (1364)	total: 2m 56s	remaining: 1m 21s
1369:	learn: 4.5011720	test: 4.2015541	best: 4.2015305 (1364)	total: 2m 56s	remaining: 1m 21s
1370:	learn: 4.5011689	test: 4.2015475	best: 4.2015305 (1364)	total: 2m 56s	remaining: 1m 21s
1371:	learn: 4.5011608	test: 4.2015161	best: 4.2015161 (1371)	total: 2m 56s	remaining: 1m 20s
1372:	learn: 4.5011590	test: 4.2015210	best: 4.2015161 (1371

1451:	learn: 4.5009694	test: 4.2010844	best: 4.2010249 (1446)	total: 3m 7s	remaining: 1m 10s
1452:	learn: 4.5009678	test: 4.2010235	best: 4.2010235 (1452)	total: 3m 7s	remaining: 1m 10s
1453:	learn: 4.5009668	test: 4.2010117	best: 4.2010117 (1453)	total: 3m 7s	remaining: 1m 10s
1454:	learn: 4.5009665	test: 4.2010046	best: 4.2010046 (1454)	total: 3m 7s	remaining: 1m 10s
1455:	learn: 4.5009663	test: 4.2010045	best: 4.2010045 (1455)	total: 3m 7s	remaining: 1m 10s
1456:	learn: 4.5009649	test: 4.2010133	best: 4.2010045 (1455)	total: 3m 7s	remaining: 1m 10s
1457:	learn: 4.5009594	test: 4.2009997	best: 4.2009997 (1457)	total: 3m 8s	remaining: 1m 9s
1458:	learn: 4.5009562	test: 4.2009721	best: 4.2009721 (1458)	total: 3m 8s	remaining: 1m 9s
1459:	learn: 4.5009528	test: 4.2009712	best: 4.2009712 (1459)	total: 3m 8s	remaining: 1m 9s
1460:	learn: 4.5009474	test: 4.2009260	best: 4.2009260 (1460)	total: 3m 8s	remaining: 1m 9s
1461:	learn: 4.5009445	test: 4.2009592	best: 4.2009260 (1460)	total: 3m 8s

1541:	learn: 4.5007941	test: 4.2011659	best: 4.2009260 (1460)	total: 3m 19s	remaining: 59.1s
1542:	learn: 4.5007900	test: 4.2011875	best: 4.2009260 (1460)	total: 3m 19s	remaining: 59s
1543:	learn: 4.5007896	test: 4.2011848	best: 4.2009260 (1460)	total: 3m 19s	remaining: 58.9s
1544:	learn: 4.5007843	test: 4.2012030	best: 4.2009260 (1460)	total: 3m 19s	remaining: 58.8s
1545:	learn: 4.5007820	test: 4.2011792	best: 4.2009260 (1460)	total: 3m 19s	remaining: 58.6s
1546:	learn: 4.5007803	test: 4.2011742	best: 4.2009260 (1460)	total: 3m 19s	remaining: 58.5s
1547:	learn: 4.5007769	test: 4.2011364	best: 4.2009260 (1460)	total: 3m 19s	remaining: 58.4s
1548:	learn: 4.5007757	test: 4.2011526	best: 4.2009260 (1460)	total: 3m 19s	remaining: 58.2s
1549:	learn: 4.5007719	test: 4.2011533	best: 4.2009260 (1460)	total: 3m 20s	remaining: 58.1s
1550:	learn: 4.5007699	test: 4.2011478	best: 4.2009260 (1460)	total: 3m 20s	remaining: 58s
1551:	learn: 4.5007650	test: 4.2010847	best: 4.2009260 (1460)	total: 3m 20

1631:	learn: 4.5006225	test: 4.2013869	best: 4.2009260 (1460)	total: 3m 31s	remaining: 47.6s
1632:	learn: 4.5006188	test: 4.2013918	best: 4.2009260 (1460)	total: 3m 31s	remaining: 47.5s
1633:	learn: 4.5006182	test: 4.2013866	best: 4.2009260 (1460)	total: 3m 31s	remaining: 47.3s
1634:	learn: 4.5006172	test: 4.2014117	best: 4.2009260 (1460)	total: 3m 31s	remaining: 47.2s
1635:	learn: 4.5006161	test: 4.2014054	best: 4.2009260 (1460)	total: 3m 31s	remaining: 47.1s
1636:	learn: 4.5006160	test: 4.2014097	best: 4.2009260 (1460)	total: 3m 31s	remaining: 46.9s
1637:	learn: 4.5006142	test: 4.2013942	best: 4.2009260 (1460)	total: 3m 31s	remaining: 46.8s
1638:	learn: 4.5006133	test: 4.2013890	best: 4.2009260 (1460)	total: 3m 31s	remaining: 46.7s
1639:	learn: 4.5006121	test: 4.2013942	best: 4.2009260 (1460)	total: 3m 32s	remaining: 46.5s
1640:	learn: 4.5006112	test: 4.2013730	best: 4.2009260 (1460)	total: 3m 32s	remaining: 46.4s
1641:	learn: 4.5006090	test: 4.2013701	best: 4.2009260 (1460)	total: 3

1721:	learn: 4.5004670	test: 4.2012208	best: 4.2009260 (1460)	total: 3m 42s	remaining: 36s
1722:	learn: 4.5004645	test: 4.2012578	best: 4.2009260 (1460)	total: 3m 43s	remaining: 35.9s
1723:	learn: 4.5004640	test: 4.2012582	best: 4.2009260 (1460)	total: 3m 43s	remaining: 35.7s
1724:	learn: 4.5004626	test: 4.2012453	best: 4.2009260 (1460)	total: 3m 43s	remaining: 35.6s
1725:	learn: 4.5004623	test: 4.2012581	best: 4.2009260 (1460)	total: 3m 43s	remaining: 35.5s
1726:	learn: 4.5004594	test: 4.2011737	best: 4.2009260 (1460)	total: 3m 43s	remaining: 35.3s
1727:	learn: 4.5004584	test: 4.2011814	best: 4.2009260 (1460)	total: 3m 43s	remaining: 35.2s
1728:	learn: 4.5004573	test: 4.2011837	best: 4.2009260 (1460)	total: 3m 43s	remaining: 35.1s
1729:	learn: 4.5004559	test: 4.2012000	best: 4.2009260 (1460)	total: 3m 44s	remaining: 35s
1730:	learn: 4.5004506	test: 4.2013630	best: 4.2009260 (1460)	total: 3m 44s	remaining: 34.8s
1731:	learn: 4.5004490	test: 4.2013694	best: 4.2009260 (1460)	total: 3m 44

1811:	learn: 4.5003106	test: 4.2012890	best: 4.2009260 (1460)	total: 3m 55s	remaining: 24.4s
1812:	learn: 4.5003088	test: 4.2012918	best: 4.2009260 (1460)	total: 3m 55s	remaining: 24.3s
1813:	learn: 4.5003072	test: 4.2013010	best: 4.2009260 (1460)	total: 3m 55s	remaining: 24.1s
1814:	learn: 4.5003057	test: 4.2013180	best: 4.2009260 (1460)	total: 3m 55s	remaining: 24s
1815:	learn: 4.5003033	test: 4.2013456	best: 4.2009260 (1460)	total: 3m 55s	remaining: 23.9s
1816:	learn: 4.5003020	test: 4.2013424	best: 4.2009260 (1460)	total: 3m 55s	remaining: 23.7s
1817:	learn: 4.5003016	test: 4.2013399	best: 4.2009260 (1460)	total: 3m 55s	remaining: 23.6s
1818:	learn: 4.5002994	test: 4.2013486	best: 4.2009260 (1460)	total: 3m 55s	remaining: 23.5s
1819:	learn: 4.5002987	test: 4.2013500	best: 4.2009260 (1460)	total: 3m 56s	remaining: 23.4s
1820:	learn: 4.5002973	test: 4.2012692	best: 4.2009260 (1460)	total: 3m 56s	remaining: 23.2s
1821:	learn: 4.5002945	test: 4.2012402	best: 4.2009260 (1460)	total: 3m 

1901:	learn: 4.5001701	test: 4.2009538	best: 4.2009260 (1460)	total: 4m 7s	remaining: 12.7s
1902:	learn: 4.5001686	test: 4.2009234	best: 4.2009234 (1902)	total: 4m 7s	remaining: 12.6s
1903:	learn: 4.5001675	test: 4.2008958	best: 4.2008958 (1903)	total: 4m 7s	remaining: 12.5s
1904:	learn: 4.5001629	test: 4.2008820	best: 4.2008820 (1904)	total: 4m 7s	remaining: 12.4s
1905:	learn: 4.5001587	test: 4.2008710	best: 4.2008710 (1905)	total: 4m 7s	remaining: 12.2s
1906:	learn: 4.5001562	test: 4.2008912	best: 4.2008710 (1905)	total: 4m 7s	remaining: 12.1s
1907:	learn: 4.5001546	test: 4.2008868	best: 4.2008710 (1905)	total: 4m 8s	remaining: 12s
1908:	learn: 4.5001543	test: 4.2008850	best: 4.2008710 (1905)	total: 4m 8s	remaining: 11.8s
1909:	learn: 4.5001537	test: 4.2008838	best: 4.2008710 (1905)	total: 4m 8s	remaining: 11.7s
1910:	learn: 4.5001534	test: 4.2008846	best: 4.2008710 (1905)	total: 4m 8s	remaining: 11.6s
1911:	learn: 4.5001529	test: 4.2008832	best: 4.2008710 (1905)	total: 4m 8s	remaini

1991:	learn: 4.5000348	test: 4.2008632	best: 4.2006803 (1946)	total: 4m 19s	remaining: 1.04s
1992:	learn: 4.5000310	test: 4.2008992	best: 4.2006803 (1946)	total: 4m 19s	remaining: 912ms
1993:	learn: 4.5000276	test: 4.2009190	best: 4.2006803 (1946)	total: 4m 19s	remaining: 782ms
1994:	learn: 4.5000255	test: 4.2009159	best: 4.2006803 (1946)	total: 4m 19s	remaining: 652ms
1995:	learn: 4.5000250	test: 4.2009187	best: 4.2006803 (1946)	total: 4m 20s	remaining: 521ms
1996:	learn: 4.5000247	test: 4.2009092	best: 4.2006803 (1946)	total: 4m 20s	remaining: 391ms
1997:	learn: 4.5000233	test: 4.2009119	best: 4.2006803 (1946)	total: 4m 20s	remaining: 261ms
1998:	learn: 4.5000215	test: 4.2009160	best: 4.2006803 (1946)	total: 4m 20s	remaining: 130ms
1999:	learn: 4.5000198	test: 4.2008897	best: 4.2006803 (1946)	total: 4m 20s	remaining: 0us

bestTest = 4.200680346
bestIteration = 1946

Shrink model to first 1947 iterations.


<catboost.core.CatBoostRanker at 0x25484c28700>

In [12]:
model.save_model('models/catboost')

In [13]:
assert False

AssertionError: 

In [None]:
n_users = np.unique(train_joke_df['UID']).size
n_items = np.unique(train_joke_df['JID']).size

test_set_all = np.zeros((n_users * n_items, 2))
for u in tqdm(range(n_users)):
    for j in range(n_items):
        test_set_all[(n_items * u) + j, 0] = u
        test_set_all[(n_items * u) + j, 1] = j

df_test = pd.DataFrame(test_set_all, columns=['UID', 'JID'])

df_test['UID'] = df_test['UID'].astype(int)
df_test['JID'] = df_test['JID'].astype(int)
df_test

In [None]:

test_pool = Pool(df_test[features], cat_features=cat_features)


In [None]:
predict = model.predict(valid_pool)
print(mean_squared_error(valid_df['Rating'].values, predict, squared=False))


In [None]:
predictions = model.predict(test_pool)


In [None]:
df_test['Rating_pred'] = predictions
df_test

In [None]:
mrg = df_test.merge(train_df, on=['UID', 'JID'], how="left", indicator=True)
mrg

In [None]:
mrg = mrg[mrg['_merge'] == 'left_only']
mrg

In [None]:
def get_n_recommendations_for_user(df, user_id, n, sort_by):
    recommended_items = df[df['UID'] == user_id]
    recommended_items = recommended_items.sort_values(sort_by, ascending=False)  
    return recommended_items.iloc[:n]

In [None]:
n_recommendations = 10

In [None]:
frames = []
for user in tqdm(range(n_users)):
    frames.append(get_n_recommendations_for_user(mrg, user, n_recommendations, 'Rating_pred'))
    
df_rec = pd.concat(frames).reset_index()
df_rec = df_rec.drop(columns=['index'])
df_rec

In [None]:
df_rec.merge(train_df, on=['UID','JID'])

In [None]:
df_rec.merge(valid_df, on=['UID','JID'])

In [None]:
catboost_predictions = list(df_rec.groupby('UID').agg({'JID':list})['JID'].values)

In [None]:
predictions_df_catboost = {'catboost':df_rec}
with open('predictions_df_catboost.pkl', 'wb') as f:
    pickle.dump(predictions_df_catboost, f)