In [2]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [3]:
train_joke_df = pd.read_csv(r'..\data\recsys-in-practice\train_joke_df.csv')

In [4]:
train_df, valid_df = train_test_split(train_joke_df, test_size=0.001, random_state=42)

In [5]:
# сделаем сортировку и перепишем index
train_df = train_df.sort_values(by=['UID', 'JID'])
train_df = train_df.reset_index(drop=True)

valid_df = valid_df.sort_values(by=['UID', 'JID'])
valid_df = valid_df.reset_index(drop=True)

In [6]:
from catboost import CatBoostRanker, Pool, MetricVisualizer, CatBoostRegressor
from copy import deepcopy

In [7]:
cat_features = ['UID', 'JID']

In [8]:
train_pool = Pool(train_df.drop(columns='Rating'), label=train_df['Rating'], group_id=train_df['UID'],cat_features=cat_features)
valid_pool = Pool(valid_df.drop(columns='Rating'), label=valid_df['Rating'], group_id=valid_df['UID'],cat_features=cat_features)

In [9]:
default_parameters = {
    'iterations': 2000,
    'custom_metric': 'RMSE',
    'random_seed': 0,
    'train_dir':'RMSE',
    'objective':'RMSE',
    'loss_function':'RMSE',
    'eval_metric':'RMSE'
}


In [10]:
model = CatBoostRegressor(**default_parameters)
model.fit(train_pool, eval_set=valid_pool, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.104582
0:	learn: 5.1160236	test: 5.1276499	best: 5.1276499 (0)	total: 460ms	remaining: 15m 19s
1:	learn: 5.0167272	test: 5.0149112	best: 5.0149112 (1)	total: 580ms	remaining: 9m 38s
2:	learn: 4.9354195	test: 4.9194108	best: 4.9194108 (2)	total: 700ms	remaining: 7m 46s
3:	learn: 4.8671870	test: 4.8371225	best: 4.8371225 (3)	total: 935ms	remaining: 7m 46s
4:	learn: 4.8081891	test: 4.7676563	best: 4.7676563 (4)	total: 1.19s	remaining: 7m 55s
5:	learn: 4.7612436	test: 4.7117244	best: 4.7117244 (5)	total: 1.31s	remaining: 7m 15s
6:	learn: 4.7218208	test: 4.6645126	best: 4.6645126 (6)	total: 1.43s	remaining: 6m 46s
7:	learn: 4.6886945	test: 4.6245431	best: 4.6245431 (7)	total: 1.56s	remaining: 6m 27s
8:	learn: 4.6615393	test: 4.5883545	best: 4.5883545 (8)	total: 1.68s	remaining: 6m 12s
9:	learn: 4.6386139	test: 4.5586890	best: 4.5586890 (9)	total: 1.81s	remaining: 5m 59s
10:	learn: 4.6196423	test: 4.5338720	best: 4.5338720 (10)	total: 1.93s	remaining: 5m 48s
11:	learn:

94:	learn: 4.5100873	test: 4.3516411	best: 4.3516411 (94)	total: 11.9s	remaining: 3m 58s
95:	learn: 4.5100321	test: 4.3515409	best: 4.3515409 (95)	total: 12s	remaining: 3m 58s
96:	learn: 4.5099953	test: 4.3515144	best: 4.3515144 (96)	total: 12.1s	remaining: 3m 58s
97:	learn: 4.5099661	test: 4.3513492	best: 4.3513492 (97)	total: 12.3s	remaining: 3m 58s
98:	learn: 4.5098969	test: 4.3510767	best: 4.3510767 (98)	total: 12.4s	remaining: 3m 58s
99:	learn: 4.5098644	test: 4.3510673	best: 4.3510673 (99)	total: 12.5s	remaining: 3m 57s
100:	learn: 4.5097963	test: 4.3506770	best: 4.3506770 (100)	total: 12.6s	remaining: 3m 57s
101:	learn: 4.5097552	test: 4.3505390	best: 4.3505390 (101)	total: 12.7s	remaining: 3m 57s
102:	learn: 4.5097277	test: 4.3505165	best: 4.3505165 (102)	total: 12.8s	remaining: 3m 56s
103:	learn: 4.5097176	test: 4.3505399	best: 4.3505165 (102)	total: 13s	remaining: 3m 56s
104:	learn: 4.5097013	test: 4.3505012	best: 4.3505012 (104)	total: 13.1s	remaining: 3m 56s
105:	learn: 4.5

185:	learn: 4.5077794	test: 4.3467629	best: 4.3465753 (184)	total: 22.9s	remaining: 3m 43s
186:	learn: 4.5077570	test: 4.3466798	best: 4.3465753 (184)	total: 23s	remaining: 3m 43s
187:	learn: 4.5077322	test: 4.3465941	best: 4.3465753 (184)	total: 23.1s	remaining: 3m 42s
188:	learn: 4.5077224	test: 4.3465809	best: 4.3465753 (184)	total: 23.2s	remaining: 3m 42s
189:	learn: 4.5077079	test: 4.3465757	best: 4.3465753 (184)	total: 23.4s	remaining: 3m 42s
190:	learn: 4.5076937	test: 4.3466865	best: 4.3465753 (184)	total: 23.5s	remaining: 3m 42s
191:	learn: 4.5076595	test: 4.3467254	best: 4.3465753 (184)	total: 23.6s	remaining: 3m 42s
192:	learn: 4.5076435	test: 4.3467038	best: 4.3465753 (184)	total: 23.8s	remaining: 3m 42s
193:	learn: 4.5076340	test: 4.3467200	best: 4.3465753 (184)	total: 23.9s	remaining: 3m 42s
194:	learn: 4.5076183	test: 4.3466035	best: 4.3465753 (184)	total: 24s	remaining: 3m 42s
195:	learn: 4.5076041	test: 4.3467204	best: 4.3465753 (184)	total: 24.1s	remaining: 3m 42s
196

277:	learn: 4.5066249	test: 4.3455746	best: 4.3455746 (277)	total: 34.1s	remaining: 3m 31s
278:	learn: 4.5066167	test: 4.3455319	best: 4.3455319 (278)	total: 34.2s	remaining: 3m 31s
279:	learn: 4.5066081	test: 4.3454929	best: 4.3454929 (279)	total: 34.4s	remaining: 3m 31s
280:	learn: 4.5066011	test: 4.3455326	best: 4.3454929 (279)	total: 34.5s	remaining: 3m 30s
281:	learn: 4.5065959	test: 4.3455004	best: 4.3454929 (279)	total: 34.6s	remaining: 3m 30s
282:	learn: 4.5065781	test: 4.3455110	best: 4.3454929 (279)	total: 34.7s	remaining: 3m 30s
283:	learn: 4.5065601	test: 4.3454753	best: 4.3454753 (283)	total: 34.8s	remaining: 3m 30s
284:	learn: 4.5065559	test: 4.3454873	best: 4.3454753 (283)	total: 35s	remaining: 3m 30s
285:	learn: 4.5065440	test: 4.3454388	best: 4.3454388 (285)	total: 35.1s	remaining: 3m 30s
286:	learn: 4.5065340	test: 4.3454215	best: 4.3454215 (286)	total: 35.2s	remaining: 3m 30s
287:	learn: 4.5065266	test: 4.3454651	best: 4.3454215 (286)	total: 35.4s	remaining: 3m 30s
2

368:	learn: 4.5058783	test: 4.3445415	best: 4.3445415 (368)	total: 45.1s	remaining: 3m 19s
369:	learn: 4.5058679	test: 4.3444999	best: 4.3444999 (369)	total: 45.2s	remaining: 3m 19s
370:	learn: 4.5058591	test: 4.3444618	best: 4.3444618 (370)	total: 45.3s	remaining: 3m 18s
371:	learn: 4.5058578	test: 4.3444695	best: 4.3444618 (370)	total: 45.4s	remaining: 3m 18s
372:	learn: 4.5058486	test: 4.3444974	best: 4.3444618 (370)	total: 45.6s	remaining: 3m 18s
373:	learn: 4.5058448	test: 4.3445061	best: 4.3444618 (370)	total: 45.7s	remaining: 3m 18s
374:	learn: 4.5058382	test: 4.3444509	best: 4.3444509 (374)	total: 45.8s	remaining: 3m 18s
375:	learn: 4.5058332	test: 4.3444367	best: 4.3444367 (375)	total: 45.9s	remaining: 3m 18s
376:	learn: 4.5058281	test: 4.3444494	best: 4.3444367 (375)	total: 46.1s	remaining: 3m 18s
377:	learn: 4.5058216	test: 4.3444139	best: 4.3444139 (377)	total: 46.2s	remaining: 3m 18s
378:	learn: 4.5058069	test: 4.3443963	best: 4.3443963 (378)	total: 46.3s	remaining: 3m 18s

459:	learn: 4.5053266	test: 4.3436318	best: 4.3436205 (457)	total: 56s	remaining: 3m 7s
460:	learn: 4.5053245	test: 4.3436303	best: 4.3436205 (457)	total: 56.1s	remaining: 3m 7s
461:	learn: 4.5053207	test: 4.3436143	best: 4.3436143 (461)	total: 56.3s	remaining: 3m 7s
462:	learn: 4.5053182	test: 4.3436254	best: 4.3436143 (461)	total: 56.4s	remaining: 3m 7s
463:	learn: 4.5053137	test: 4.3436200	best: 4.3436143 (461)	total: 56.5s	remaining: 3m 7s
464:	learn: 4.5053111	test: 4.3436299	best: 4.3436143 (461)	total: 56.6s	remaining: 3m 6s
465:	learn: 4.5053055	test: 4.3436148	best: 4.3436143 (461)	total: 56.8s	remaining: 3m 6s
466:	learn: 4.5053049	test: 4.3436139	best: 4.3436139 (466)	total: 56.9s	remaining: 3m 6s
467:	learn: 4.5052958	test: 4.3436160	best: 4.3436139 (466)	total: 57s	remaining: 3m 6s
468:	learn: 4.5052892	test: 4.3436132	best: 4.3436132 (468)	total: 57.1s	remaining: 3m 6s
469:	learn: 4.5052849	test: 4.3435409	best: 4.3435409 (469)	total: 57.2s	remaining: 3m 6s
470:	learn: 4.

551:	learn: 4.5048625	test: 4.3430704	best: 4.3430441 (546)	total: 1m 7s	remaining: 2m 56s
552:	learn: 4.5048542	test: 4.3430341	best: 4.3430341 (552)	total: 1m 7s	remaining: 2m 56s
553:	learn: 4.5048505	test: 4.3430414	best: 4.3430341 (552)	total: 1m 7s	remaining: 2m 56s
554:	learn: 4.5048448	test: 4.3430966	best: 4.3430341 (552)	total: 1m 7s	remaining: 2m 56s
555:	learn: 4.5048435	test: 4.3430902	best: 4.3430341 (552)	total: 1m 7s	remaining: 2m 55s
556:	learn: 4.5048423	test: 4.3431168	best: 4.3430341 (552)	total: 1m 7s	remaining: 2m 55s
557:	learn: 4.5048379	test: 4.3431452	best: 4.3430341 (552)	total: 1m 8s	remaining: 2m 55s
558:	learn: 4.5048371	test: 4.3431516	best: 4.3430341 (552)	total: 1m 8s	remaining: 2m 55s
559:	learn: 4.5048336	test: 4.3431417	best: 4.3430341 (552)	total: 1m 8s	remaining: 2m 55s
560:	learn: 4.5048298	test: 4.3431345	best: 4.3430341 (552)	total: 1m 8s	remaining: 2m 55s
561:	learn: 4.5048277	test: 4.3431277	best: 4.3430341 (552)	total: 1m 8s	remaining: 2m 55s

642:	learn: 4.5045088	test: 4.3427824	best: 4.3427539 (638)	total: 1m 18s	remaining: 2m 45s
643:	learn: 4.5045060	test: 4.3427480	best: 4.3427480 (643)	total: 1m 18s	remaining: 2m 45s
644:	learn: 4.5045051	test: 4.3427433	best: 4.3427433 (644)	total: 1m 18s	remaining: 2m 45s
645:	learn: 4.5044958	test: 4.3427146	best: 4.3427146 (645)	total: 1m 18s	remaining: 2m 45s
646:	learn: 4.5044947	test: 4.3426881	best: 4.3426881 (646)	total: 1m 18s	remaining: 2m 44s
647:	learn: 4.5044937	test: 4.3426914	best: 4.3426881 (646)	total: 1m 19s	remaining: 2m 44s
648:	learn: 4.5044903	test: 4.3427202	best: 4.3426881 (646)	total: 1m 19s	remaining: 2m 44s
649:	learn: 4.5044819	test: 4.3426998	best: 4.3426881 (646)	total: 1m 19s	remaining: 2m 44s
650:	learn: 4.5044772	test: 4.3427453	best: 4.3426881 (646)	total: 1m 19s	remaining: 2m 44s
651:	learn: 4.5044740	test: 4.3427223	best: 4.3426881 (646)	total: 1m 19s	remaining: 2m 44s
652:	learn: 4.5044724	test: 4.3427189	best: 4.3426881 (646)	total: 1m 19s	remain

732:	learn: 4.5041339	test: 4.3425652	best: 4.3425613 (724)	total: 1m 29s	remaining: 2m 34s
733:	learn: 4.5041316	test: 4.3425323	best: 4.3425323 (733)	total: 1m 29s	remaining: 2m 34s
734:	learn: 4.5041281	test: 4.3425475	best: 4.3425323 (733)	total: 1m 29s	remaining: 2m 33s
735:	learn: 4.5041268	test: 4.3425486	best: 4.3425323 (733)	total: 1m 29s	remaining: 2m 33s
736:	learn: 4.5041256	test: 4.3425332	best: 4.3425323 (733)	total: 1m 29s	remaining: 2m 33s
737:	learn: 4.5041229	test: 4.3425390	best: 4.3425323 (733)	total: 1m 29s	remaining: 2m 33s
738:	learn: 4.5041143	test: 4.3423860	best: 4.3423860 (738)	total: 1m 29s	remaining: 2m 33s
739:	learn: 4.5041130	test: 4.3423906	best: 4.3423860 (738)	total: 1m 30s	remaining: 2m 33s
740:	learn: 4.5041098	test: 4.3423963	best: 4.3423860 (738)	total: 1m 30s	remaining: 2m 33s
741:	learn: 4.5041090	test: 4.3424055	best: 4.3423860 (738)	total: 1m 30s	remaining: 2m 33s
742:	learn: 4.5041063	test: 4.3424138	best: 4.3423860 (738)	total: 1m 30s	remain

823:	learn: 4.5038340	test: 4.3421284	best: 4.3420567 (773)	total: 1m 40s	remaining: 2m 23s
824:	learn: 4.5038324	test: 4.3421043	best: 4.3420567 (773)	total: 1m 40s	remaining: 2m 22s
825:	learn: 4.5038307	test: 4.3420846	best: 4.3420567 (773)	total: 1m 40s	remaining: 2m 22s
826:	learn: 4.5038268	test: 4.3420647	best: 4.3420567 (773)	total: 1m 40s	remaining: 2m 22s
827:	learn: 4.5038207	test: 4.3421545	best: 4.3420567 (773)	total: 1m 40s	remaining: 2m 22s
828:	learn: 4.5038199	test: 4.3421374	best: 4.3420567 (773)	total: 1m 40s	remaining: 2m 22s
829:	learn: 4.5038134	test: 4.3421365	best: 4.3420567 (773)	total: 1m 40s	remaining: 2m 22s
830:	learn: 4.5038113	test: 4.3421259	best: 4.3420567 (773)	total: 1m 41s	remaining: 2m 22s
831:	learn: 4.5038104	test: 4.3421429	best: 4.3420567 (773)	total: 1m 41s	remaining: 2m 22s
832:	learn: 4.5038090	test: 4.3421738	best: 4.3420567 (773)	total: 1m 41s	remaining: 2m 21s
833:	learn: 4.5038069	test: 4.3421697	best: 4.3420567 (773)	total: 1m 41s	remain

914:	learn: 4.5035503	test: 4.3419725	best: 4.3419524 (899)	total: 1m 51s	remaining: 2m 12s
915:	learn: 4.5035499	test: 4.3419776	best: 4.3419524 (899)	total: 1m 51s	remaining: 2m 12s
916:	learn: 4.5035453	test: 4.3419843	best: 4.3419524 (899)	total: 1m 51s	remaining: 2m 12s
917:	learn: 4.5035441	test: 4.3419856	best: 4.3419524 (899)	total: 1m 51s	remaining: 2m 11s
918:	learn: 4.5035434	test: 4.3419871	best: 4.3419524 (899)	total: 1m 52s	remaining: 2m 11s
919:	learn: 4.5035414	test: 4.3419862	best: 4.3419524 (899)	total: 1m 52s	remaining: 2m 11s
920:	learn: 4.5035368	test: 4.3419299	best: 4.3419299 (920)	total: 1m 52s	remaining: 2m 11s
921:	learn: 4.5035320	test: 4.3419237	best: 4.3419237 (921)	total: 1m 52s	remaining: 2m 11s
922:	learn: 4.5035312	test: 4.3418893	best: 4.3418893 (922)	total: 1m 52s	remaining: 2m 11s
923:	learn: 4.5035304	test: 4.3418835	best: 4.3418835 (923)	total: 1m 52s	remaining: 2m 11s
924:	learn: 4.5035275	test: 4.3418872	best: 4.3418835 (923)	total: 1m 52s	remain

1006:	learn: 4.5032871	test: 4.3416586	best: 4.3416586 (1006)	total: 2m 2s	remaining: 2m 1s
1007:	learn: 4.5032846	test: 4.3416540	best: 4.3416540 (1007)	total: 2m 2s	remaining: 2m 1s
1008:	learn: 4.5032812	test: 4.3416554	best: 4.3416540 (1007)	total: 2m 3s	remaining: 2m
1009:	learn: 4.5032724	test: 4.3416215	best: 4.3416215 (1009)	total: 2m 3s	remaining: 2m
1010:	learn: 4.5032721	test: 4.3416227	best: 4.3416215 (1009)	total: 2m 3s	remaining: 2m
1011:	learn: 4.5032714	test: 4.3416172	best: 4.3416172 (1011)	total: 2m 3s	remaining: 2m
1012:	learn: 4.5032713	test: 4.3416185	best: 4.3416172 (1011)	total: 2m 3s	remaining: 2m
1013:	learn: 4.5032697	test: 4.3416178	best: 4.3416172 (1011)	total: 2m 3s	remaining: 2m
1014:	learn: 4.5032664	test: 4.3415934	best: 4.3415934 (1014)	total: 2m 3s	remaining: 2m
1015:	learn: 4.5032645	test: 4.3416406	best: 4.3415934 (1014)	total: 2m 3s	remaining: 2m
1016:	learn: 4.5032613	test: 4.3416539	best: 4.3415934 (1014)	total: 2m 4s	remaining: 1m 59s
1017:	learn

1096:	learn: 4.5030364	test: 4.3416737	best: 4.3415266 (1085)	total: 2m 13s	remaining: 1m 50s
1097:	learn: 4.5030364	test: 4.3416752	best: 4.3415266 (1085)	total: 2m 13s	remaining: 1m 49s
1098:	learn: 4.5030349	test: 4.3416436	best: 4.3415266 (1085)	total: 2m 14s	remaining: 1m 49s
1099:	learn: 4.5030344	test: 4.3416442	best: 4.3415266 (1085)	total: 2m 14s	remaining: 1m 49s
1100:	learn: 4.5030320	test: 4.3416708	best: 4.3415266 (1085)	total: 2m 14s	remaining: 1m 49s
1101:	learn: 4.5030316	test: 4.3416861	best: 4.3415266 (1085)	total: 2m 14s	remaining: 1m 49s
1102:	learn: 4.5030294	test: 4.3416775	best: 4.3415266 (1085)	total: 2m 14s	remaining: 1m 49s
1103:	learn: 4.5030243	test: 4.3416804	best: 4.3415266 (1085)	total: 2m 14s	remaining: 1m 49s
1104:	learn: 4.5030241	test: 4.3416884	best: 4.3415266 (1085)	total: 2m 14s	remaining: 1m 49s
1105:	learn: 4.5030222	test: 4.3416869	best: 4.3415266 (1085)	total: 2m 14s	remaining: 1m 49s
1106:	learn: 4.5030197	test: 4.3416970	best: 4.3415266 (1085

1184:	learn: 4.5028329	test: 4.3416333	best: 4.3414261 (1145)	total: 2m 24s	remaining: 1m 39s
1185:	learn: 4.5028324	test: 4.3416338	best: 4.3414261 (1145)	total: 2m 24s	remaining: 1m 39s
1186:	learn: 4.5028276	test: 4.3416272	best: 4.3414261 (1145)	total: 2m 24s	remaining: 1m 39s
1187:	learn: 4.5028256	test: 4.3416240	best: 4.3414261 (1145)	total: 2m 24s	remaining: 1m 39s
1188:	learn: 4.5028244	test: 4.3415817	best: 4.3414261 (1145)	total: 2m 25s	remaining: 1m 38s
1189:	learn: 4.5028223	test: 4.3415309	best: 4.3414261 (1145)	total: 2m 25s	remaining: 1m 38s
1190:	learn: 4.5028205	test: 4.3415130	best: 4.3414261 (1145)	total: 2m 25s	remaining: 1m 38s
1191:	learn: 4.5028155	test: 4.3415202	best: 4.3414261 (1145)	total: 2m 25s	remaining: 1m 38s
1192:	learn: 4.5028140	test: 4.3415218	best: 4.3414261 (1145)	total: 2m 25s	remaining: 1m 38s
1193:	learn: 4.5028121	test: 4.3415453	best: 4.3414261 (1145)	total: 2m 25s	remaining: 1m 38s
1194:	learn: 4.5028121	test: 4.3415453	best: 4.3414261 (1145

1272:	learn: 4.5026541	test: 4.3415418	best: 4.3414261 (1145)	total: 2m 35s	remaining: 1m 28s
1273:	learn: 4.5026539	test: 4.3415415	best: 4.3414261 (1145)	total: 2m 35s	remaining: 1m 28s
1274:	learn: 4.5026519	test: 4.3415377	best: 4.3414261 (1145)	total: 2m 35s	remaining: 1m 28s
1275:	learn: 4.5026481	test: 4.3415500	best: 4.3414261 (1145)	total: 2m 35s	remaining: 1m 28s
1276:	learn: 4.5026463	test: 4.3415635	best: 4.3414261 (1145)	total: 2m 35s	remaining: 1m 28s
1277:	learn: 4.5026435	test: 4.3415530	best: 4.3414261 (1145)	total: 2m 35s	remaining: 1m 28s
1278:	learn: 4.5026386	test: 4.3415100	best: 4.3414261 (1145)	total: 2m 35s	remaining: 1m 27s
1279:	learn: 4.5026371	test: 4.3415249	best: 4.3414261 (1145)	total: 2m 36s	remaining: 1m 27s
1280:	learn: 4.5026331	test: 4.3415228	best: 4.3414261 (1145)	total: 2m 36s	remaining: 1m 27s
1281:	learn: 4.5026323	test: 4.3415225	best: 4.3414261 (1145)	total: 2m 36s	remaining: 1m 27s
1282:	learn: 4.5026317	test: 4.3415222	best: 4.3414261 (1145

1360:	learn: 4.5024793	test: 4.3410789	best: 4.3410549 (1358)	total: 2m 46s	remaining: 1m 18s
1361:	learn: 4.5024761	test: 4.3410744	best: 4.3410549 (1358)	total: 2m 46s	remaining: 1m 17s
1362:	learn: 4.5024742	test: 4.3410529	best: 4.3410529 (1362)	total: 2m 46s	remaining: 1m 17s
1363:	learn: 4.5024723	test: 4.3410605	best: 4.3410529 (1362)	total: 2m 46s	remaining: 1m 17s
1364:	learn: 4.5024709	test: 4.3410714	best: 4.3410529 (1362)	total: 2m 46s	remaining: 1m 17s
1365:	learn: 4.5024700	test: 4.3410530	best: 4.3410529 (1362)	total: 2m 47s	remaining: 1m 17s
1366:	learn: 4.5024690	test: 4.3410914	best: 4.3410529 (1362)	total: 2m 47s	remaining: 1m 17s
1367:	learn: 4.5024641	test: 4.3411554	best: 4.3410529 (1362)	total: 2m 47s	remaining: 1m 17s
1368:	learn: 4.5024599	test: 4.3411779	best: 4.3410529 (1362)	total: 2m 47s	remaining: 1m 17s
1369:	learn: 4.5024589	test: 4.3411439	best: 4.3410529 (1362)	total: 2m 47s	remaining: 1m 17s
1370:	learn: 4.5024585	test: 4.3411421	best: 4.3410529 (1362

1449:	learn: 4.5023018	test: 4.3405804	best: 4.3405387 (1444)	total: 2m 57s	remaining: 1m 7s
1450:	learn: 4.5023016	test: 4.3405788	best: 4.3405387 (1444)	total: 2m 57s	remaining: 1m 7s
1451:	learn: 4.5023002	test: 4.3405916	best: 4.3405387 (1444)	total: 2m 58s	remaining: 1m 7s
1452:	learn: 4.5022970	test: 4.3406370	best: 4.3405387 (1444)	total: 2m 58s	remaining: 1m 7s
1453:	learn: 4.5022953	test: 4.3406211	best: 4.3405387 (1444)	total: 2m 58s	remaining: 1m 6s
1454:	learn: 4.5022941	test: 4.3406203	best: 4.3405387 (1444)	total: 2m 58s	remaining: 1m 6s
1455:	learn: 4.5022934	test: 4.3406235	best: 4.3405387 (1444)	total: 2m 58s	remaining: 1m 6s
1456:	learn: 4.5022934	test: 4.3406243	best: 4.3405387 (1444)	total: 2m 58s	remaining: 1m 6s
1457:	learn: 4.5022904	test: 4.3406380	best: 4.3405387 (1444)	total: 2m 58s	remaining: 1m 6s
1458:	learn: 4.5022900	test: 4.3406385	best: 4.3405387 (1444)	total: 2m 58s	remaining: 1m 6s
1459:	learn: 4.5022876	test: 4.3406516	best: 4.3405387 (1444)	total: 2

1539:	learn: 4.5021325	test: 4.3406666	best: 4.3405387 (1444)	total: 3m 8s	remaining: 56.4s
1540:	learn: 4.5021320	test: 4.3406536	best: 4.3405387 (1444)	total: 3m 9s	remaining: 56.3s
1541:	learn: 4.5021304	test: 4.3406509	best: 4.3405387 (1444)	total: 3m 9s	remaining: 56.2s
1542:	learn: 4.5021301	test: 4.3406456	best: 4.3405387 (1444)	total: 3m 9s	remaining: 56.1s
1543:	learn: 4.5021290	test: 4.3406642	best: 4.3405387 (1444)	total: 3m 9s	remaining: 55.9s
1544:	learn: 4.5021284	test: 4.3406547	best: 4.3405387 (1444)	total: 3m 9s	remaining: 55.8s
1545:	learn: 4.5021267	test: 4.3406368	best: 4.3405387 (1444)	total: 3m 9s	remaining: 55.7s
1546:	learn: 4.5021262	test: 4.3406418	best: 4.3405387 (1444)	total: 3m 9s	remaining: 55.6s
1547:	learn: 4.5021252	test: 4.3406335	best: 4.3405387 (1444)	total: 3m 9s	remaining: 55.5s
1548:	learn: 4.5021243	test: 4.3406264	best: 4.3405387 (1444)	total: 3m 10s	remaining: 55.3s
1549:	learn: 4.5021196	test: 4.3406081	best: 4.3405387 (1444)	total: 3m 10s	rem

1629:	learn: 4.5019682	test: 4.3407213	best: 4.3405387 (1444)	total: 3m 19s	remaining: 45.4s
1630:	learn: 4.5019655	test: 4.3407029	best: 4.3405387 (1444)	total: 3m 20s	remaining: 45.3s
1631:	learn: 4.5019650	test: 4.3406926	best: 4.3405387 (1444)	total: 3m 20s	remaining: 45.1s
1632:	learn: 4.5019645	test: 4.3406838	best: 4.3405387 (1444)	total: 3m 20s	remaining: 45s
1633:	learn: 4.5019627	test: 4.3406741	best: 4.3405387 (1444)	total: 3m 20s	remaining: 44.9s
1634:	learn: 4.5019622	test: 4.3406705	best: 4.3405387 (1444)	total: 3m 20s	remaining: 44.8s
1635:	learn: 4.5019617	test: 4.3406731	best: 4.3405387 (1444)	total: 3m 20s	remaining: 44.7s
1636:	learn: 4.5019600	test: 4.3406089	best: 4.3405387 (1444)	total: 3m 20s	remaining: 44.5s
1637:	learn: 4.5019558	test: 4.3406169	best: 4.3405387 (1444)	total: 3m 20s	remaining: 44.4s
1638:	learn: 4.5019528	test: 4.3406002	best: 4.3405387 (1444)	total: 3m 21s	remaining: 44.3s
1639:	learn: 4.5019523	test: 4.3405945	best: 4.3405387 (1444)	total: 3m 

1719:	learn: 4.5018012	test: 4.3403144	best: 4.3402519 (1685)	total: 3m 31s	remaining: 34.4s
1720:	learn: 4.5017999	test: 4.3403048	best: 4.3402519 (1685)	total: 3m 31s	remaining: 34.2s
1721:	learn: 4.5017988	test: 4.3403062	best: 4.3402519 (1685)	total: 3m 31s	remaining: 34.1s
1722:	learn: 4.5017961	test: 4.3403128	best: 4.3402519 (1685)	total: 3m 31s	remaining: 34s
1723:	learn: 4.5017951	test: 4.3403176	best: 4.3402519 (1685)	total: 3m 31s	remaining: 33.9s
1724:	learn: 4.5017921	test: 4.3403274	best: 4.3402519 (1685)	total: 3m 31s	remaining: 33.7s
1725:	learn: 4.5017909	test: 4.3403319	best: 4.3402519 (1685)	total: 3m 31s	remaining: 33.6s
1726:	learn: 4.5017862	test: 4.3403515	best: 4.3402519 (1685)	total: 3m 31s	remaining: 33.5s
1727:	learn: 4.5017858	test: 4.3403498	best: 4.3402519 (1685)	total: 3m 32s	remaining: 33.4s
1728:	learn: 4.5017853	test: 4.3403316	best: 4.3402519 (1685)	total: 3m 32s	remaining: 33.3s
1729:	learn: 4.5017824	test: 4.3403304	best: 4.3402519 (1685)	total: 3m 

1809:	learn: 4.5016487	test: 4.3399417	best: 4.3399361 (1807)	total: 3m 42s	remaining: 23.3s
1810:	learn: 4.5016484	test: 4.3399391	best: 4.3399361 (1807)	total: 3m 42s	remaining: 23.2s
1811:	learn: 4.5016465	test: 4.3399117	best: 4.3399117 (1811)	total: 3m 42s	remaining: 23.1s
1812:	learn: 4.5016453	test: 4.3398993	best: 4.3398993 (1812)	total: 3m 42s	remaining: 23s
1813:	learn: 4.5016440	test: 4.3398998	best: 4.3398993 (1812)	total: 3m 42s	remaining: 22.8s
1814:	learn: 4.5016412	test: 4.3399086	best: 4.3398993 (1812)	total: 3m 42s	remaining: 22.7s
1815:	learn: 4.5016391	test: 4.3398600	best: 4.3398600 (1815)	total: 3m 42s	remaining: 22.6s
1816:	learn: 4.5016366	test: 4.3398546	best: 4.3398546 (1816)	total: 3m 43s	remaining: 22.5s
1817:	learn: 4.5016363	test: 4.3398550	best: 4.3398546 (1816)	total: 3m 43s	remaining: 22.3s
1818:	learn: 4.5016350	test: 4.3398679	best: 4.3398546 (1816)	total: 3m 43s	remaining: 22.2s
1819:	learn: 4.5016341	test: 4.3398671	best: 4.3398546 (1816)	total: 3m 

1898:	learn: 4.5015096	test: 4.3397386	best: 4.3397386 (1898)	total: 3m 53s	remaining: 12.4s
1899:	learn: 4.5015054	test: 4.3397069	best: 4.3397069 (1899)	total: 3m 53s	remaining: 12.3s
1900:	learn: 4.5015036	test: 4.3397097	best: 4.3397069 (1899)	total: 3m 53s	remaining: 12.2s
1901:	learn: 4.5015031	test: 4.3397093	best: 4.3397069 (1899)	total: 3m 53s	remaining: 12s
1902:	learn: 4.5015014	test: 4.3397055	best: 4.3397055 (1902)	total: 3m 53s	remaining: 11.9s
1903:	learn: 4.5015009	test: 4.3397094	best: 4.3397055 (1902)	total: 3m 53s	remaining: 11.8s
1904:	learn: 4.5014980	test: 4.3397097	best: 4.3397055 (1902)	total: 3m 53s	remaining: 11.7s
1905:	learn: 4.5014934	test: 4.3397281	best: 4.3397055 (1902)	total: 3m 54s	remaining: 11.5s
1906:	learn: 4.5014916	test: 4.3397270	best: 4.3397055 (1902)	total: 3m 54s	remaining: 11.4s
1907:	learn: 4.5014904	test: 4.3397325	best: 4.3397055 (1902)	total: 3m 54s	remaining: 11.3s
1908:	learn: 4.5014894	test: 4.3397334	best: 4.3397055 (1902)	total: 3m 

1987:	learn: 4.5013752	test: 4.3396330	best: 4.3395610 (1944)	total: 4m 4s	remaining: 1.47s
1988:	learn: 4.5013744	test: 4.3396281	best: 4.3395610 (1944)	total: 4m 4s	remaining: 1.35s
1989:	learn: 4.5013725	test: 4.3396304	best: 4.3395610 (1944)	total: 4m 4s	remaining: 1.23s
1990:	learn: 4.5013713	test: 4.3396424	best: 4.3395610 (1944)	total: 4m 4s	remaining: 1.1s
1991:	learn: 4.5013701	test: 4.3396208	best: 4.3395610 (1944)	total: 4m 4s	remaining: 983ms
1992:	learn: 4.5013665	test: 4.3395953	best: 4.3395610 (1944)	total: 4m 4s	remaining: 860ms
1993:	learn: 4.5013661	test: 4.3395878	best: 4.3395610 (1944)	total: 4m 5s	remaining: 737ms
1994:	learn: 4.5013644	test: 4.3396008	best: 4.3395610 (1944)	total: 4m 5s	remaining: 614ms
1995:	learn: 4.5013622	test: 4.3396006	best: 4.3395610 (1944)	total: 4m 5s	remaining: 491ms
1996:	learn: 4.5013606	test: 4.3396398	best: 4.3395610 (1944)	total: 4m 5s	remaining: 369ms
1997:	learn: 4.5013598	test: 4.3396291	best: 4.3395610 (1944)	total: 4m 5s	remain

<catboost.core.CatBoostRegressor at 0x2c8354fa5f0>

In [11]:
predict = model.predict(valid_pool)
print(mean_squared_error(valid_df['Rating'].values, predict, squared=False))

predict = model.predict(train_pool)
print(mean_squared_error(train_df['Rating'].values, predict, squared=False))


4.339561031804889
4.354841828685592


In [12]:
model_name = "catboost"

In [13]:
model.save_model(model_name)

from_file = CatBoostRegressor()

from_file.load_model(model_name)


<catboost.core.CatBoostRegressor at 0x2c8380d1030>

In [14]:
predict = from_file.predict(valid_pool)
print(mean_squared_error(valid_df['Rating'].values, predict, squared=False))

predict = from_file.predict(train_pool)
print(mean_squared_error(train_df['Rating'].values, predict, squared=False))


4.339561031804889
4.354841828685592


In [16]:
test_joke_df_nofactrating = pd.read_csv(r'..\data\recsys-in-practice\test_joke_df_nofactrating.csv', index_col=0)

test_joke_df_nofactrating = test_joke_df_nofactrating.sort_values(by=['UID', 'JID'])
test_pool = Pool(test_joke_df_nofactrating, group_id=test_joke_df_nofactrating['UID'], cat_features=cat_features)

In [17]:
predict = model.predict(test_pool)

test_joke_df_nofactrating['Rating'] = predict

display(test_joke_df_nofactrating['Rating'].to_frame().head(5))
test_joke_df_nofactrating['Rating'].to_frame().to_csv('catboost_v2.csv')

Unnamed: 0_level_0,Rating
InteractionID,Unnamed: 1_level_1
219361,-0.981968
83962,-1.759919
64748,-2.126065
83931,0.584312
170573,-1.509255


In [18]:
test_joke_df_nofactrating

Unnamed: 0_level_0,UID,JID,Rating
InteractionID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
219361,1,11,-0.981968
83962,1,19,-1.759919
64748,1,25,-2.126065
83931,1,31,0.584312
170573,1,34,-1.509255
...,...,...,...
291554,24983,60,1.211056
21758,24983,61,4.190840
14611,24983,63,1.482737
259703,24983,65,4.190840
