In [1]:
from sklearn.feature_extraction.text import CountVectorizer
from catboost import CatBoostRegressor, Pool
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from tensorflow import keras
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

In [2]:
data = pd.read_csv('./train.csv')
data_test = pd.read_csv('./test.csv')

In [3]:
data_train, data_valid = train_test_split(data)

# Glove embedding

In [4]:
embeddings_dict = {}
with open("glove.6B.300d.txt", 'r') as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = np.asarray(values[1:], "float32")
        embeddings_dict[word] = vector

In [5]:
num_reserved = 2

# Create embedding layer which will mask out unknown words
embedding_vocab = list(embeddings_dict.keys())
embeddings_matrix = np.zeros((len(embeddings_dict) + num_reserved, 300))
embeddings_matrix[num_reserved:] = np.array(list(embeddings_dict.values()))
embedding_layer = keras.layers.Embedding(len(embeddings_matrix), 300, 
                                   trainable=False, 
                                   weights=[embeddings_matrix], 
                                   mask_zero=True)
avg_layer = keras.layers.GlobalAveragePooling1D()

In [6]:
vectorize_layer = TextVectorization(output_mode='int')
vectorize_layer.set_vocabulary(embedding_vocab)

embed_avg = keras.Sequential([keras.Input(shape=(1,), dtype=tf.string), 
                              vectorize_layer,
                              embedding_layer,
                              avg_layer])

In [7]:
text_train = tf.data.Dataset.from_tensor_slices(data_train['Review'])
text_valid = tf.data.Dataset.from_tensor_slices(data_valid['Review'])
text_test = tf.data.Dataset.from_tensor_slices(data_test['Review'])

In [8]:
X_text_train = embed_avg.predict(text_train.batch(1000))
X_text_valid = embed_avg.predict(text_valid.batch(1000))
X_text_test = embed_avg.predict(text_test.batch(1000))

In [9]:
del embedding_vocab, embeddings_matrix, embedding_layer, embed_avg, embeddings_dict

# Non-text data preparation

In [10]:
data_train.fillna({'Price': 0, 'Brand': 'NaN'}, inplace=True)
data_valid.fillna({'Price': 0, 'Brand': 'NaN'}, inplace=True)
data_test.fillna({'Price': 0, 'Brand': 'NaN'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


Now gather up all the features and put them into matrcies for training

In [11]:
X_train = np.hstack([data_train[['Price', 'Brand', 'Upvotes']], X_text_train])
X_valid = np.hstack([data_valid[['Price', 'Brand', 'Upvotes']], X_text_valid])
X_test = np.hstack([data_test[['Price', 'Brand', 'Upvotes']], X_text_test])
del X_text_train, X_text_valid, X_text_test

y_train = data_train['Rating']
y_valid = data_valid['Rating']
del data_train, data_valid, 

In [14]:
cat = CatBoostRegressor(iterations=2000, learning_rate=0.07)
cat.fit(X_train, y_train, eval_set=Pool(X_valid, y_valid, cat_features=[1]), cat_features=[1])

0:	learn: 1.5131738	test: 1.5100741	best: 1.5100741 (0)	total: 220ms	remaining: 7m 20s
1:	learn: 1.4840572	test: 1.4810656	best: 1.4810656 (1)	total: 419ms	remaining: 6m 58s
2:	learn: 1.4571591	test: 1.4541276	best: 1.4541276 (2)	total: 691ms	remaining: 7m 40s
3:	learn: 1.4324766	test: 1.4293169	best: 1.4293169 (3)	total: 886ms	remaining: 7m 22s
4:	learn: 1.4100856	test: 1.4069992	best: 1.4069992 (4)	total: 1.12s	remaining: 7m 27s
5:	learn: 1.3899191	test: 1.3869006	best: 1.3869006 (5)	total: 1.33s	remaining: 7m 21s
6:	learn: 1.3706184	test: 1.3675004	best: 1.3675004 (6)	total: 1.57s	remaining: 7m 28s
7:	learn: 1.3538418	test: 1.3505593	best: 1.3505593 (7)	total: 1.8s	remaining: 7m 28s
8:	learn: 1.3374206	test: 1.3340534	best: 1.3340534 (8)	total: 1.99s	remaining: 7m 20s
9:	learn: 1.3217255	test: 1.3181731	best: 1.3181731 (9)	total: 2.23s	remaining: 7m 22s
10:	learn: 1.3081812	test: 1.3048848	best: 1.3048848 (10)	total: 2.44s	remaining: 7m 20s
11:	learn: 1.2952598	test: 1.2921071	best:

93:	learn: 1.0560185	test: 1.0582730	best: 1.0582730 (93)	total: 19.9s	remaining: 6m 43s
94:	learn: 1.0552736	test: 1.0576034	best: 1.0576034 (94)	total: 20.1s	remaining: 6m 42s
95:	learn: 1.0544573	test: 1.0568316	best: 1.0568316 (95)	total: 20.3s	remaining: 6m 42s
96:	learn: 1.0535818	test: 1.0559853	best: 1.0559853 (96)	total: 20.5s	remaining: 6m 41s
97:	learn: 1.0529247	test: 1.0554159	best: 1.0554159 (97)	total: 20.7s	remaining: 6m 42s
98:	learn: 1.0521554	test: 1.0546043	best: 1.0546043 (98)	total: 20.9s	remaining: 6m 41s
99:	learn: 1.0514242	test: 1.0539191	best: 1.0539191 (99)	total: 21.1s	remaining: 6m 41s
100:	learn: 1.0507372	test: 1.0533003	best: 1.0533003 (100)	total: 21.3s	remaining: 6m 41s
101:	learn: 1.0499740	test: 1.0525129	best: 1.0525129 (101)	total: 21.6s	remaining: 6m 41s
102:	learn: 1.0491922	test: 1.0518043	best: 1.0518043 (102)	total: 21.7s	remaining: 6m 40s
103:	learn: 1.0484562	test: 1.0511019	best: 1.0511019 (103)	total: 21.9s	remaining: 6m 39s
104:	learn: 1

185:	learn: 1.0071796	test: 1.0129363	best: 1.0129363 (185)	total: 38.3s	remaining: 6m 13s
186:	learn: 1.0067713	test: 1.0125506	best: 1.0125506 (186)	total: 38.5s	remaining: 6m 13s
187:	learn: 1.0063109	test: 1.0121185	best: 1.0121185 (187)	total: 38.7s	remaining: 6m 13s
188:	learn: 1.0058461	test: 1.0116899	best: 1.0116899 (188)	total: 38.9s	remaining: 6m 12s
189:	learn: 1.0053841	test: 1.0112521	best: 1.0112521 (189)	total: 39.2s	remaining: 6m 13s
190:	learn: 1.0049835	test: 1.0108928	best: 1.0108928 (190)	total: 39.3s	remaining: 6m 12s
191:	learn: 1.0045446	test: 1.0104868	best: 1.0104868 (191)	total: 39.5s	remaining: 6m 12s
192:	learn: 1.0041039	test: 1.0100704	best: 1.0100704 (192)	total: 39.8s	remaining: 6m 12s
193:	learn: 1.0037359	test: 1.0097711	best: 1.0097711 (193)	total: 39.9s	remaining: 6m 11s
194:	learn: 1.0033422	test: 1.0094158	best: 1.0094158 (194)	total: 40.1s	remaining: 6m 11s
195:	learn: 1.0029577	test: 1.0090830	best: 1.0090830 (195)	total: 40.4s	remaining: 6m 11s

277:	learn: 0.9735611	test: 0.9834073	best: 0.9834073 (277)	total: 57.4s	remaining: 5m 55s
278:	learn: 0.9732432	test: 0.9831465	best: 0.9831465 (278)	total: 57.6s	remaining: 5m 55s
279:	learn: 0.9729205	test: 0.9828786	best: 0.9828786 (279)	total: 57.8s	remaining: 5m 55s
280:	learn: 0.9725903	test: 0.9825953	best: 0.9825953 (280)	total: 58s	remaining: 5m 54s
281:	learn: 0.9722473	test: 0.9823226	best: 0.9823226 (281)	total: 58.2s	remaining: 5m 54s
282:	learn: 0.9719438	test: 0.9820318	best: 0.9820318 (282)	total: 58.5s	remaining: 5m 54s
283:	learn: 0.9716657	test: 0.9817948	best: 0.9817948 (283)	total: 58.7s	remaining: 5m 54s
284:	learn: 0.9713802	test: 0.9815664	best: 0.9815664 (284)	total: 58.9s	remaining: 5m 54s
285:	learn: 0.9711135	test: 0.9813121	best: 0.9813121 (285)	total: 59.1s	remaining: 5m 54s
286:	learn: 0.9707997	test: 0.9810308	best: 0.9810308 (286)	total: 59.3s	remaining: 5m 53s
287:	learn: 0.9704996	test: 0.9807954	best: 0.9807954 (287)	total: 59.6s	remaining: 5m 54s
2

368:	learn: 0.9483488	test: 0.9625834	best: 0.9625834 (368)	total: 1m 17s	remaining: 5m 43s
369:	learn: 0.9481046	test: 0.9624219	best: 0.9624219 (369)	total: 1m 17s	remaining: 5m 43s
370:	learn: 0.9478723	test: 0.9622358	best: 0.9622358 (370)	total: 1m 18s	remaining: 5m 42s
371:	learn: 0.9476313	test: 0.9620210	best: 0.9620210 (371)	total: 1m 18s	remaining: 5m 42s
372:	learn: 0.9473644	test: 0.9618080	best: 0.9618080 (372)	total: 1m 18s	remaining: 5m 42s
373:	learn: 0.9471288	test: 0.9616061	best: 0.9616061 (373)	total: 1m 18s	remaining: 5m 41s
374:	learn: 0.9468893	test: 0.9614178	best: 0.9614178 (374)	total: 1m 18s	remaining: 5m 41s
375:	learn: 0.9466293	test: 0.9612315	best: 0.9612315 (375)	total: 1m 19s	remaining: 5m 41s
376:	learn: 0.9463753	test: 0.9610198	best: 0.9610198 (376)	total: 1m 19s	remaining: 5m 41s
377:	learn: 0.9461119	test: 0.9608206	best: 0.9608206 (377)	total: 1m 19s	remaining: 5m 41s
378:	learn: 0.9458797	test: 0.9606413	best: 0.9606413 (378)	total: 1m 19s	remain

459:	learn: 0.9276455	test: 0.9461634	best: 0.9461634 (459)	total: 1m 36s	remaining: 5m 21s
460:	learn: 0.9274185	test: 0.9459715	best: 0.9459715 (460)	total: 1m 36s	remaining: 5m 21s
461:	learn: 0.9271991	test: 0.9458042	best: 0.9458042 (461)	total: 1m 36s	remaining: 5m 21s
462:	learn: 0.9269736	test: 0.9456045	best: 0.9456045 (462)	total: 1m 36s	remaining: 5m 20s
463:	learn: 0.9267700	test: 0.9454602	best: 0.9454602 (463)	total: 1m 36s	remaining: 5m 20s
464:	learn: 0.9265823	test: 0.9452731	best: 0.9452731 (464)	total: 1m 37s	remaining: 5m 20s
465:	learn: 0.9263795	test: 0.9451174	best: 0.9451174 (465)	total: 1m 37s	remaining: 5m 20s
466:	learn: 0.9262100	test: 0.9449774	best: 0.9449774 (466)	total: 1m 37s	remaining: 5m 19s
467:	learn: 0.9260265	test: 0.9448506	best: 0.9448506 (467)	total: 1m 37s	remaining: 5m 19s
468:	learn: 0.9258438	test: 0.9446984	best: 0.9446984 (468)	total: 1m 37s	remaining: 5m 19s
469:	learn: 0.9256207	test: 0.9445030	best: 0.9445030 (469)	total: 1m 37s	remain

549:	learn: 0.9105068	test: 0.9328306	best: 0.9328306 (549)	total: 1m 54s	remaining: 5m 1s
550:	learn: 0.9103134	test: 0.9326625	best: 0.9326625 (550)	total: 1m 54s	remaining: 5m 1s
551:	learn: 0.9101326	test: 0.9325272	best: 0.9325272 (551)	total: 1m 54s	remaining: 5m
552:	learn: 0.9099499	test: 0.9323813	best: 0.9323813 (552)	total: 1m 54s	remaining: 5m
553:	learn: 0.9097581	test: 0.9322500	best: 0.9322500 (553)	total: 1m 55s	remaining: 5m
554:	learn: 0.9095721	test: 0.9320996	best: 0.9320996 (554)	total: 1m 55s	remaining: 5m
555:	learn: 0.9094133	test: 0.9319780	best: 0.9319780 (555)	total: 1m 55s	remaining: 4m 59s
556:	learn: 0.9092461	test: 0.9318401	best: 0.9318401 (556)	total: 1m 55s	remaining: 4m 59s
557:	learn: 0.9090377	test: 0.9316652	best: 0.9316652 (557)	total: 1m 55s	remaining: 4m 59s
558:	learn: 0.9088525	test: 0.9315153	best: 0.9315153 (558)	total: 1m 56s	remaining: 4m 59s
559:	learn: 0.9087194	test: 0.9313988	best: 0.9313988 (559)	total: 1m 56s	remaining: 4m 58s
560:	l

640:	learn: 0.8948635	test: 0.9211912	best: 0.9211912 (640)	total: 2m 12s	remaining: 4m 41s
641:	learn: 0.8946996	test: 0.9210804	best: 0.9210804 (641)	total: 2m 12s	remaining: 4m 41s
642:	learn: 0.8945567	test: 0.9209434	best: 0.9209434 (642)	total: 2m 13s	remaining: 4m 41s
643:	learn: 0.8943852	test: 0.9207997	best: 0.9207997 (643)	total: 2m 13s	remaining: 4m 40s
644:	learn: 0.8942518	test: 0.9206963	best: 0.9206963 (644)	total: 2m 13s	remaining: 4m 40s
645:	learn: 0.8940937	test: 0.9205570	best: 0.9205570 (645)	total: 2m 13s	remaining: 4m 40s
646:	learn: 0.8939386	test: 0.9204430	best: 0.9204430 (646)	total: 2m 13s	remaining: 4m 40s
647:	learn: 0.8937823	test: 0.9203536	best: 0.9203536 (647)	total: 2m 14s	remaining: 4m 39s
648:	learn: 0.8936104	test: 0.9202012	best: 0.9202012 (648)	total: 2m 14s	remaining: 4m 39s
649:	learn: 0.8934704	test: 0.9200970	best: 0.9200970 (649)	total: 2m 14s	remaining: 4m 39s
650:	learn: 0.8933117	test: 0.9199682	best: 0.9199682 (650)	total: 2m 14s	remain

730:	learn: 0.8809336	test: 0.9108570	best: 0.9108570 (730)	total: 2m 30s	remaining: 4m 21s
731:	learn: 0.8807872	test: 0.9107598	best: 0.9107598 (731)	total: 2m 30s	remaining: 4m 21s
732:	learn: 0.8806895	test: 0.9106850	best: 0.9106850 (732)	total: 2m 30s	remaining: 4m 20s
733:	learn: 0.8805491	test: 0.9105777	best: 0.9105777 (733)	total: 2m 31s	remaining: 4m 20s
734:	learn: 0.8803779	test: 0.9104406	best: 0.9104406 (734)	total: 2m 31s	remaining: 4m 20s
735:	learn: 0.8802558	test: 0.9103480	best: 0.9103480 (735)	total: 2m 31s	remaining: 4m 20s
736:	learn: 0.8801022	test: 0.9102201	best: 0.9102201 (736)	total: 2m 31s	remaining: 4m 19s
737:	learn: 0.8799617	test: 0.9101043	best: 0.9101043 (737)	total: 2m 31s	remaining: 4m 19s
738:	learn: 0.8798374	test: 0.9100039	best: 0.9100039 (738)	total: 2m 32s	remaining: 4m 19s
739:	learn: 0.8796821	test: 0.9098844	best: 0.9098844 (739)	total: 2m 32s	remaining: 4m 19s
740:	learn: 0.8795385	test: 0.9097935	best: 0.9097935 (740)	total: 2m 32s	remain

820:	learn: 0.8681313	test: 0.9015932	best: 0.9015932 (820)	total: 2m 48s	remaining: 4m 2s
821:	learn: 0.8679896	test: 0.9014983	best: 0.9014983 (821)	total: 2m 48s	remaining: 4m 1s
822:	learn: 0.8678613	test: 0.9013924	best: 0.9013924 (822)	total: 2m 48s	remaining: 4m 1s
823:	learn: 0.8677334	test: 0.9013074	best: 0.9013074 (823)	total: 2m 49s	remaining: 4m 1s
824:	learn: 0.8676060	test: 0.9012125	best: 0.9012125 (824)	total: 2m 49s	remaining: 4m 1s
825:	learn: 0.8674589	test: 0.9011115	best: 0.9011115 (825)	total: 2m 49s	remaining: 4m
826:	learn: 0.8673275	test: 0.9010238	best: 0.9010238 (826)	total: 2m 49s	remaining: 4m
827:	learn: 0.8672171	test: 0.9009630	best: 0.9009630 (827)	total: 2m 49s	remaining: 4m
828:	learn: 0.8670900	test: 0.9008585	best: 0.9008585 (828)	total: 2m 50s	remaining: 4m
829:	learn: 0.8669605	test: 0.9007881	best: 0.9007881 (829)	total: 2m 50s	remaining: 4m
830:	learn: 0.8668614	test: 0.9007269	best: 0.9007269 (830)	total: 2m 50s	remaining: 3m 59s
831:	learn: 0

911:	learn: 0.8563595	test: 0.8934012	best: 0.8934012 (911)	total: 3m 7s	remaining: 3m 43s
912:	learn: 0.8562266	test: 0.8933232	best: 0.8933232 (912)	total: 3m 7s	remaining: 3m 42s
913:	learn: 0.8561230	test: 0.8932557	best: 0.8932557 (913)	total: 3m 7s	remaining: 3m 42s
914:	learn: 0.8559889	test: 0.8931831	best: 0.8931831 (914)	total: 3m 7s	remaining: 3m 42s
915:	learn: 0.8558586	test: 0.8930590	best: 0.8930590 (915)	total: 3m 7s	remaining: 3m 42s
916:	learn: 0.8557377	test: 0.8929571	best: 0.8929571 (916)	total: 3m 8s	remaining: 3m 42s
917:	learn: 0.8556197	test: 0.8928677	best: 0.8928677 (917)	total: 3m 8s	remaining: 3m 41s
918:	learn: 0.8554786	test: 0.8927793	best: 0.8927793 (918)	total: 3m 8s	remaining: 3m 41s
919:	learn: 0.8553695	test: 0.8927186	best: 0.8927186 (919)	total: 3m 8s	remaining: 3m 41s
920:	learn: 0.8552579	test: 0.8926461	best: 0.8926461 (920)	total: 3m 8s	remaining: 3m 41s
921:	learn: 0.8551453	test: 0.8925662	best: 0.8925662 (921)	total: 3m 9s	remaining: 3m 41s

1001:	learn: 0.8455820	test: 0.8857801	best: 0.8857801 (1001)	total: 3m 27s	remaining: 3m 27s
1002:	learn: 0.8454622	test: 0.8857047	best: 0.8857047 (1002)	total: 3m 28s	remaining: 3m 27s
1003:	learn: 0.8453439	test: 0.8856211	best: 0.8856211 (1003)	total: 3m 28s	remaining: 3m 26s
1004:	learn: 0.8452253	test: 0.8855321	best: 0.8855321 (1004)	total: 3m 28s	remaining: 3m 26s
1005:	learn: 0.8451076	test: 0.8854688	best: 0.8854688 (1005)	total: 3m 29s	remaining: 3m 26s
1006:	learn: 0.8449777	test: 0.8853814	best: 0.8853814 (1006)	total: 3m 29s	remaining: 3m 26s
1007:	learn: 0.8448766	test: 0.8853114	best: 0.8853114 (1007)	total: 3m 29s	remaining: 3m 26s
1008:	learn: 0.8447545	test: 0.8852157	best: 0.8852157 (1008)	total: 3m 29s	remaining: 3m 25s
1009:	learn: 0.8446200	test: 0.8851500	best: 0.8851500 (1009)	total: 3m 29s	remaining: 3m 25s
1010:	learn: 0.8445110	test: 0.8850746	best: 0.8850746 (1010)	total: 3m 30s	remaining: 3m 25s
1011:	learn: 0.8443966	test: 0.8850027	best: 0.8850027 (1011

1089:	learn: 0.8354507	test: 0.8789038	best: 0.8789038 (1089)	total: 3m 46s	remaining: 3m 8s
1090:	learn: 0.8353469	test: 0.8788397	best: 0.8788397 (1090)	total: 3m 46s	remaining: 3m 8s
1091:	learn: 0.8352276	test: 0.8787569	best: 0.8787569 (1091)	total: 3m 46s	remaining: 3m 8s
1092:	learn: 0.8351235	test: 0.8786740	best: 0.8786740 (1092)	total: 3m 46s	remaining: 3m 8s
1093:	learn: 0.8350054	test: 0.8785924	best: 0.8785924 (1093)	total: 3m 47s	remaining: 3m 8s
1094:	learn: 0.8348887	test: 0.8785004	best: 0.8785004 (1094)	total: 3m 47s	remaining: 3m 7s
1095:	learn: 0.8347634	test: 0.8784167	best: 0.8784167 (1095)	total: 3m 47s	remaining: 3m 7s
1096:	learn: 0.8346330	test: 0.8783227	best: 0.8783227 (1096)	total: 3m 47s	remaining: 3m 7s
1097:	learn: 0.8345259	test: 0.8782662	best: 0.8782662 (1097)	total: 3m 47s	remaining: 3m 7s
1098:	learn: 0.8344207	test: 0.8782200	best: 0.8782200 (1098)	total: 3m 48s	remaining: 3m 7s
1099:	learn: 0.8343474	test: 0.8781869	best: 0.8781869 (1099)	total: 3

1178:	learn: 0.8256192	test: 0.8722059	best: 0.8722059 (1178)	total: 4m 3s	remaining: 2m 49s
1179:	learn: 0.8255313	test: 0.8721553	best: 0.8721553 (1179)	total: 4m 3s	remaining: 2m 49s
1180:	learn: 0.8254311	test: 0.8720762	best: 0.8720762 (1180)	total: 4m 4s	remaining: 2m 49s
1181:	learn: 0.8253433	test: 0.8720146	best: 0.8720146 (1181)	total: 4m 4s	remaining: 2m 49s
1182:	learn: 0.8252394	test: 0.8719424	best: 0.8719424 (1182)	total: 4m 4s	remaining: 2m 48s
1183:	learn: 0.8251486	test: 0.8718713	best: 0.8718713 (1183)	total: 4m 4s	remaining: 2m 48s
1184:	learn: 0.8250318	test: 0.8718014	best: 0.8718014 (1184)	total: 4m 4s	remaining: 2m 48s
1185:	learn: 0.8249150	test: 0.8717289	best: 0.8717289 (1185)	total: 4m 4s	remaining: 2m 48s
1186:	learn: 0.8247886	test: 0.8716405	best: 0.8716405 (1186)	total: 4m 5s	remaining: 2m 47s
1187:	learn: 0.8246892	test: 0.8715784	best: 0.8715784 (1187)	total: 4m 5s	remaining: 2m 47s
1188:	learn: 0.8245781	test: 0.8714873	best: 0.8714873 (1188)	total: 4

1266:	learn: 0.8165383	test: 0.8661695	best: 0.8661695 (1266)	total: 4m 19s	remaining: 2m 30s
1267:	learn: 0.8164209	test: 0.8660768	best: 0.8660768 (1267)	total: 4m 19s	remaining: 2m 29s
1268:	learn: 0.8163165	test: 0.8660210	best: 0.8660210 (1268)	total: 4m 19s	remaining: 2m 29s
1269:	learn: 0.8162364	test: 0.8659691	best: 0.8659691 (1269)	total: 4m 19s	remaining: 2m 29s
1270:	learn: 0.8161297	test: 0.8658991	best: 0.8658991 (1270)	total: 4m 20s	remaining: 2m 29s
1271:	learn: 0.8160268	test: 0.8658358	best: 0.8658358 (1271)	total: 4m 20s	remaining: 2m 28s
1272:	learn: 0.8159266	test: 0.8657583	best: 0.8657583 (1272)	total: 4m 20s	remaining: 2m 28s
1273:	learn: 0.8158203	test: 0.8656755	best: 0.8656755 (1273)	total: 4m 20s	remaining: 2m 28s
1274:	learn: 0.8157076	test: 0.8655833	best: 0.8655833 (1274)	total: 4m 20s	remaining: 2m 28s
1275:	learn: 0.8156000	test: 0.8655265	best: 0.8655265 (1275)	total: 4m 20s	remaining: 2m 28s
1276:	learn: 0.8155136	test: 0.8654315	best: 0.8654315 (1276

1354:	learn: 0.8080387	test: 0.8604627	best: 0.8604627 (1354)	total: 4m 35s	remaining: 2m 11s
1355:	learn: 0.8079661	test: 0.8604148	best: 0.8604148 (1355)	total: 4m 35s	remaining: 2m 10s
1356:	learn: 0.8078578	test: 0.8603411	best: 0.8603411 (1356)	total: 4m 35s	remaining: 2m 10s
1357:	learn: 0.8077621	test: 0.8602615	best: 0.8602615 (1357)	total: 4m 35s	remaining: 2m 10s
1358:	learn: 0.8076566	test: 0.8601931	best: 0.8601931 (1358)	total: 4m 36s	remaining: 2m 10s
1359:	learn: 0.8075533	test: 0.8601312	best: 0.8601312 (1359)	total: 4m 36s	remaining: 2m 9s
1360:	learn: 0.8074759	test: 0.8600762	best: 0.8600762 (1360)	total: 4m 36s	remaining: 2m 9s
1361:	learn: 0.8073678	test: 0.8599925	best: 0.8599925 (1361)	total: 4m 36s	remaining: 2m 9s
1362:	learn: 0.8072667	test: 0.8599425	best: 0.8599425 (1362)	total: 4m 36s	remaining: 2m 9s
1363:	learn: 0.8071638	test: 0.8598788	best: 0.8598788 (1363)	total: 4m 36s	remaining: 2m 9s
1364:	learn: 0.8070602	test: 0.8597802	best: 0.8597802 (1364)	tot

1443:	learn: 0.7995503	test: 0.8548940	best: 0.8548940 (1443)	total: 4m 51s	remaining: 1m 52s
1444:	learn: 0.7994839	test: 0.8548432	best: 0.8548432 (1444)	total: 4m 51s	remaining: 1m 52s
1445:	learn: 0.7994110	test: 0.8547782	best: 0.8547782 (1445)	total: 4m 52s	remaining: 1m 51s
1446:	learn: 0.7993330	test: 0.8547139	best: 0.8547139 (1446)	total: 4m 52s	remaining: 1m 51s
1447:	learn: 0.7992463	test: 0.8546711	best: 0.8546711 (1447)	total: 4m 52s	remaining: 1m 51s
1448:	learn: 0.7991570	test: 0.8546169	best: 0.8546169 (1448)	total: 4m 52s	remaining: 1m 51s
1449:	learn: 0.7990605	test: 0.8545438	best: 0.8545438 (1449)	total: 4m 52s	remaining: 1m 51s
1450:	learn: 0.7989659	test: 0.8545004	best: 0.8545004 (1450)	total: 4m 53s	remaining: 1m 50s
1451:	learn: 0.7988775	test: 0.8544405	best: 0.8544405 (1451)	total: 4m 53s	remaining: 1m 50s
1452:	learn: 0.7988035	test: 0.8543878	best: 0.8543878 (1452)	total: 4m 53s	remaining: 1m 50s
1453:	learn: 0.7987242	test: 0.8543473	best: 0.8543473 (1453

1531:	learn: 0.7917308	test: 0.8498903	best: 0.8498903 (1531)	total: 5m 10s	remaining: 1m 34s
1532:	learn: 0.7916661	test: 0.8498259	best: 0.8498259 (1532)	total: 5m 10s	remaining: 1m 34s
1533:	learn: 0.7915679	test: 0.8497729	best: 0.8497729 (1533)	total: 5m 11s	remaining: 1m 34s
1534:	learn: 0.7914711	test: 0.8497177	best: 0.8497177 (1534)	total: 5m 11s	remaining: 1m 34s
1535:	learn: 0.7913834	test: 0.8496722	best: 0.8496722 (1535)	total: 5m 11s	remaining: 1m 34s
1536:	learn: 0.7913058	test: 0.8496308	best: 0.8496308 (1536)	total: 5m 11s	remaining: 1m 33s
1537:	learn: 0.7912197	test: 0.8495727	best: 0.8495727 (1537)	total: 5m 12s	remaining: 1m 33s
1538:	learn: 0.7911379	test: 0.8495236	best: 0.8495236 (1538)	total: 5m 12s	remaining: 1m 33s
1539:	learn: 0.7910677	test: 0.8494764	best: 0.8494764 (1539)	total: 5m 12s	remaining: 1m 33s
1540:	learn: 0.7909809	test: 0.8494289	best: 0.8494289 (1540)	total: 5m 13s	remaining: 1m 33s
1541:	learn: 0.7908996	test: 0.8493754	best: 0.8493754 (1541

1620:	learn: 0.7840950	test: 0.8449953	best: 0.8449953 (1620)	total: 5m 31s	remaining: 1m 17s
1621:	learn: 0.7840273	test: 0.8449491	best: 0.8449491 (1621)	total: 5m 31s	remaining: 1m 17s
1622:	learn: 0.7839556	test: 0.8449110	best: 0.8449110 (1622)	total: 5m 31s	remaining: 1m 16s
1623:	learn: 0.7838692	test: 0.8448541	best: 0.8448541 (1623)	total: 5m 31s	remaining: 1m 16s
1624:	learn: 0.7837951	test: 0.8448131	best: 0.8448131 (1624)	total: 5m 31s	remaining: 1m 16s
1625:	learn: 0.7837037	test: 0.8447656	best: 0.8447656 (1625)	total: 5m 32s	remaining: 1m 16s
1626:	learn: 0.7836419	test: 0.8447315	best: 0.8447315 (1626)	total: 5m 32s	remaining: 1m 16s
1627:	learn: 0.7835482	test: 0.8446827	best: 0.8446827 (1627)	total: 5m 32s	remaining: 1m 15s
1628:	learn: 0.7834611	test: 0.8446153	best: 0.8446153 (1628)	total: 5m 32s	remaining: 1m 15s
1629:	learn: 0.7833753	test: 0.8445707	best: 0.8445707 (1629)	total: 5m 32s	remaining: 1m 15s
1630:	learn: 0.7833016	test: 0.8445479	best: 0.8445479 (1630

1709:	learn: 0.7767270	test: 0.8402213	best: 0.8402213 (1709)	total: 5m 48s	remaining: 59.1s
1710:	learn: 0.7766417	test: 0.8401563	best: 0.8401563 (1710)	total: 5m 48s	remaining: 58.9s
1711:	learn: 0.7765619	test: 0.8400944	best: 0.8400944 (1711)	total: 5m 49s	remaining: 58.7s
1712:	learn: 0.7764796	test: 0.8400313	best: 0.8400313 (1712)	total: 5m 49s	remaining: 58.5s
1713:	learn: 0.7763946	test: 0.8399677	best: 0.8399677 (1713)	total: 5m 49s	remaining: 58.3s
1714:	learn: 0.7763233	test: 0.8399263	best: 0.8399263 (1714)	total: 5m 49s	remaining: 58.1s
1715:	learn: 0.7762585	test: 0.8398837	best: 0.8398837 (1715)	total: 5m 49s	remaining: 57.9s
1716:	learn: 0.7761845	test: 0.8398464	best: 0.8398464 (1716)	total: 5m 50s	remaining: 57.7s
1717:	learn: 0.7760973	test: 0.8397912	best: 0.8397912 (1717)	total: 5m 50s	remaining: 57.5s
1718:	learn: 0.7760054	test: 0.8397251	best: 0.8397251 (1718)	total: 5m 50s	remaining: 57.3s
1719:	learn: 0.7759273	test: 0.8396934	best: 0.8396934 (1719)	total: 5

1798:	learn: 0.7695840	test: 0.8357026	best: 0.8357026 (1798)	total: 6m 7s	remaining: 41.1s
1799:	learn: 0.7695044	test: 0.8356595	best: 0.8356595 (1799)	total: 6m 8s	remaining: 40.9s
1800:	learn: 0.7694173	test: 0.8356041	best: 0.8356041 (1800)	total: 6m 8s	remaining: 40.7s
1801:	learn: 0.7693466	test: 0.8355548	best: 0.8355548 (1801)	total: 6m 8s	remaining: 40.5s
1802:	learn: 0.7692756	test: 0.8355190	best: 0.8355190 (1802)	total: 6m 8s	remaining: 40.3s
1803:	learn: 0.7692126	test: 0.8354730	best: 0.8354730 (1803)	total: 6m 9s	remaining: 40.1s
1804:	learn: 0.7691398	test: 0.8354386	best: 0.8354386 (1804)	total: 6m 9s	remaining: 39.9s
1805:	learn: 0.7690530	test: 0.8353718	best: 0.8353718 (1805)	total: 6m 9s	remaining: 39.7s
1806:	learn: 0.7689578	test: 0.8353186	best: 0.8353186 (1806)	total: 6m 9s	remaining: 39.5s
1807:	learn: 0.7688696	test: 0.8352637	best: 0.8352637 (1807)	total: 6m 9s	remaining: 39.3s
1808:	learn: 0.7688085	test: 0.8352232	best: 0.8352232 (1808)	total: 6m 10s	rema

1887:	learn: 0.7627278	test: 0.8315190	best: 0.8315190 (1887)	total: 6m 29s	remaining: 23.1s
1888:	learn: 0.7626509	test: 0.8314727	best: 0.8314727 (1888)	total: 6m 29s	remaining: 22.9s
1889:	learn: 0.7625785	test: 0.8314452	best: 0.8314452 (1889)	total: 6m 29s	remaining: 22.7s
1890:	learn: 0.7624985	test: 0.8313940	best: 0.8313940 (1890)	total: 6m 29s	remaining: 22.5s
1891:	learn: 0.7624054	test: 0.8313540	best: 0.8313540 (1891)	total: 6m 30s	remaining: 22.3s
1892:	learn: 0.7623148	test: 0.8313015	best: 0.8313015 (1892)	total: 6m 30s	remaining: 22.1s
1893:	learn: 0.7622427	test: 0.8312375	best: 0.8312375 (1893)	total: 6m 30s	remaining: 21.9s
1894:	learn: 0.7621574	test: 0.8311922	best: 0.8311922 (1894)	total: 6m 30s	remaining: 21.7s
1895:	learn: 0.7620960	test: 0.8311454	best: 0.8311454 (1895)	total: 6m 31s	remaining: 21.5s
1896:	learn: 0.7620196	test: 0.8310996	best: 0.8310996 (1896)	total: 6m 31s	remaining: 21.2s
1897:	learn: 0.7619352	test: 0.8310443	best: 0.8310443 (1897)	total: 6

1976:	learn: 0.7561822	test: 0.8274579	best: 0.8274579 (1976)	total: 6m 47s	remaining: 4.75s
1977:	learn: 0.7560963	test: 0.8273995	best: 0.8273995 (1977)	total: 6m 48s	remaining: 4.54s
1978:	learn: 0.7560185	test: 0.8273595	best: 0.8273595 (1978)	total: 6m 48s	remaining: 4.33s
1979:	learn: 0.7559463	test: 0.8273268	best: 0.8273268 (1979)	total: 6m 48s	remaining: 4.13s
1980:	learn: 0.7558636	test: 0.8272756	best: 0.8272756 (1980)	total: 6m 48s	remaining: 3.92s
1981:	learn: 0.7557755	test: 0.8272325	best: 0.8272325 (1981)	total: 6m 48s	remaining: 3.71s
1982:	learn: 0.7556955	test: 0.8271890	best: 0.8271890 (1982)	total: 6m 49s	remaining: 3.51s
1983:	learn: 0.7556047	test: 0.8271326	best: 0.8271326 (1983)	total: 6m 49s	remaining: 3.3s
1984:	learn: 0.7555516	test: 0.8270985	best: 0.8270985 (1984)	total: 6m 49s	remaining: 3.09s
1985:	learn: 0.7554766	test: 0.8270810	best: 0.8270810 (1985)	total: 6m 49s	remaining: 2.89s
1986:	learn: 0.7553881	test: 0.8270203	best: 0.8270203 (1986)	total: 6m

<catboost.core.CatBoostRegressor at 0x7f33f8eacb90>

In [15]:
np.savetxt('./submission.csv', cat.predict(X_test))

# Testing

In [17]:
ground_truth = np.loadtxt('./answers.csv')
preds = np.loadtxt('./submission.csv')
mean_squared_error(ground_truth, preds) ** 0.5

0.8289894487550301