In [2]:
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor

In [69]:
df1 = pd.read_csv('datasets/db_waw_flats.csv', parse_dates = ['today', 'publication_date'])
df2 = pd.read_csv('datasets/db_waw_flats_17_06.csv', parse_dates = ['today', 'publication_date'])
df3 = pd.read_csv('datasets/db_waw_flats_27_6.csv', parse_dates = ['today', 'publication_date'])
df4 = pd.read_csv('datasets/db_waw_flats_10_7.csv', parse_dates = ['today', 'publication_date'])

In [70]:
common_df = pd.concat([df1,df2,df3,df4], ignore_index=True)
common_df

Unnamed: 0,price,area,rooms,renovation,floor,market,elevator,street,district,balcony,terrace,garden,parking,central_heating,seller,blok,cena_m,today,publication_date
0,1152000.0,72.00,3.0,3,2,2,1,ul. gen. tadeusza pełczyńskiego,bemowo,1,0,0,1,0,2,1,16000.000000,2023-05-25,2023-05-14 19:36:44
1,1790000.0,115.41,4.0,3,2,2,1,ul. obrzeżna,mokotów,0,1,0,1,1,1,0,15509.921151,2023-05-25,2023-05-19 15:27:31
2,610000.0,54.20,3.0,3,2,2,1,ul. magiczna,białołęka,1,0,0,1,0,2,1,11254.612546,2023-05-25,2023-05-06 19:36:14
3,799000.0,61.10,3.0,3,2,2,1,ul. marywilska,białołęka,1,0,0,1,0,2,1,13076.923077,2023-05-25,2023-05-22 19:37:02
4,1040000.0,46.00,2.0,3,2,2,1,ul. żelazna,wola,1,0,0,0,1,2,0,22608.695652,2023-05-25,2023-05-06 19:33:30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36499,1479400.0,113.80,4.0,3,1,2,1,ul. meander,ursynów,1,0,0,1,1,1,1,13000.000000,2023-07-10,2015-10-27 20:02:18
36500,7520000.0,320.00,4.0,3,2,2,1,unknown,wola,1,1,0,1,1,1,0,23500.000000,2023-07-10,2015-06-23 20:17:44
36501,2700000.0,200.00,8.0,1,1,2,0,ul. łotewska,praga-południe,0,0,0,0,0,1,0,13500.000000,2023-07-10,2014-11-10 12:05:12
36502,600000.0,53.50,3.0,3,2,2,0,ul. komitetu obrony robotników,włochy,1,0,0,0,1,2,1,11214.953271,2023-07-10,2023-07-10 20:40:18


In [71]:
common_df.drop(columns=['today']).drop_duplicates(keep='last', inplace=True)

In [72]:
cols_x = ['district', 'area', 'rooms', 'renovation', 'floor', 'balcony', 'terrace',
          'garden', 'parking', 'central_heating', 'market', 'seller', 'blok', 'elevator']

cat_cols = ['district', 'renovation', 'floor', 'balcony', 'terrace', 'garden', 'parking',
            'central_heating', 'market', 'seller', 'blok', 'elevator']
col_y = ['price']

model_df = common_df[cols_x + col_y].dropna()

In [73]:
model_df['district']

0                bemowo
1               mokotów
2             białołęka
3             białołęka
4                  wola
              ...      
36499           ursynów
36500              wola
36501    praga-południe
36502            włochy
36503          żoliborz
Name: district, Length: 35332, dtype: object

In [74]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error
le = preprocessing.LabelEncoder()
le.fit(model_df['district'])
model_df['district'] = le.transform(model_df['district'])

X_train, X_test, y_train, y_test = train_test_split(model_df[cols_x], model_df['price'], test_size=0.25, random_state=10)

In [75]:
from sklearn.tree import DecisionTreeRegressor
est = DecisionTreeRegressor(random_state=0, max_depth=12)
ada_model = AdaBoostRegressor(estimator=est, random_state=0, n_estimators=50, learning_rate=0.05, loss='square')
ada_model.fit(X_train, y_train)
ada_preds = ada_model.predict(X_test)

In [76]:
print('percentage error', mean_absolute_percentage_error(y_test, ada_preds))
print('absolute error', mean_absolute_error(y_test, ada_preds))
print('root squared error', mean_squared_error(y_test, ada_preds, squared=False))

percentage error 0.11043793642934216
absolute error 104032.38897184428
root squared error 197500.5229055571


In [77]:
from catboost import Pool, CatBoostRegressor
train_pool = Pool(X_train, y_train, cat_features=cat_cols)
test_pool = Pool(X_test, cat_features=cat_cols)
cb_model = CatBoostRegressor(iterations=3000, learning_rate=0.04, l2_leaf_reg=1, depth=8)
cb_model.fit(train_pool)
cb_preds = cb_model.predict(test_pool)

0:	learn: 1172523.7388317	total: 167ms	remaining: 8m 21s
1:	learn: 1140425.2304694	total: 218ms	remaining: 5m 27s
2:	learn: 1109468.9808084	total: 309ms	remaining: 5m 8s
3:	learn: 1080720.1841882	total: 378ms	remaining: 4m 43s
4:	learn: 1053657.4864845	total: 452ms	remaining: 4m 30s
5:	learn: 1027120.8875018	total: 501ms	remaining: 4m 9s
6:	learn: 1002986.5925228	total: 530ms	remaining: 3m 46s
7:	learn: 979830.7616174	total: 551ms	remaining: 3m 25s
8:	learn: 956420.0842048	total: 587ms	remaining: 3m 15s
9:	learn: 934257.2641234	total: 606ms	remaining: 3m 1s
10:	learn: 913410.4645010	total: 621ms	remaining: 2m 48s
11:	learn: 893417.0677879	total: 637ms	remaining: 2m 38s
12:	learn: 874221.4504954	total: 647ms	remaining: 2m 28s
13:	learn: 855867.7773209	total: 657ms	remaining: 2m 20s
14:	learn: 838820.5958227	total: 665ms	remaining: 2m 12s
15:	learn: 821011.8122445	total: 674ms	remaining: 2m 5s
16:	learn: 805175.0508276	total: 683ms	remaining: 1m 59s
17:	learn: 789100.2226793	total: 690ms

161:	learn: 390371.1303963	total: 1.81s	remaining: 31.7s
162:	learn: 390160.3620576	total: 1.82s	remaining: 31.7s
163:	learn: 389668.7549668	total: 1.83s	remaining: 31.6s
164:	learn: 388665.9052342	total: 1.83s	remaining: 31.5s
165:	learn: 387595.8159481	total: 1.84s	remaining: 31.4s
166:	learn: 387059.4591250	total: 1.85s	remaining: 31.4s
167:	learn: 386414.1390309	total: 1.86s	remaining: 31.3s
168:	learn: 386111.5788978	total: 1.86s	remaining: 31.2s
169:	learn: 385350.0290302	total: 1.88s	remaining: 31.2s
170:	learn: 385030.1633120	total: 1.88s	remaining: 31.2s
171:	learn: 383767.4004590	total: 1.89s	remaining: 31.1s
172:	learn: 383203.6300670	total: 1.9s	remaining: 31s
173:	learn: 382373.5452035	total: 1.91s	remaining: 30.9s
174:	learn: 382029.5169119	total: 1.91s	remaining: 30.9s
175:	learn: 381730.2603207	total: 1.92s	remaining: 30.8s
176:	learn: 380927.6449937	total: 1.93s	remaining: 30.7s
177:	learn: 380728.3805950	total: 1.94s	remaining: 30.7s
178:	learn: 380272.6184327	total: 

320:	learn: 322891.6935915	total: 3.03s	remaining: 25.3s
321:	learn: 322689.8164971	total: 3.04s	remaining: 25.3s
322:	learn: 322377.2616967	total: 3.05s	remaining: 25.3s
323:	learn: 322146.7684898	total: 3.06s	remaining: 25.3s
324:	learn: 321832.2542780	total: 3.07s	remaining: 25.3s
325:	learn: 321149.1134352	total: 3.08s	remaining: 25.2s
326:	learn: 320950.9014463	total: 3.08s	remaining: 25.2s
327:	learn: 320309.7116838	total: 3.09s	remaining: 25.2s
328:	learn: 320244.7572114	total: 3.1s	remaining: 25.2s
329:	learn: 319787.1545916	total: 3.11s	remaining: 25.2s
330:	learn: 319542.9130496	total: 3.12s	remaining: 25.1s
331:	learn: 319054.9667455	total: 3.13s	remaining: 25.1s
332:	learn: 318844.9127283	total: 3.14s	remaining: 25.1s
333:	learn: 318696.0473539	total: 3.14s	remaining: 25.1s
334:	learn: 318338.5529664	total: 3.15s	remaining: 25.1s
335:	learn: 318122.2336583	total: 3.16s	remaining: 25s
336:	learn: 317879.5797411	total: 3.17s	remaining: 25s
337:	learn: 317536.7177901	total: 3.

474:	learn: 274967.8691361	total: 4.25s	remaining: 22.6s
475:	learn: 274859.5046682	total: 4.26s	remaining: 22.6s
476:	learn: 274765.5106865	total: 4.27s	remaining: 22.6s
477:	learn: 274473.3649761	total: 4.28s	remaining: 22.6s
478:	learn: 274391.6205927	total: 4.28s	remaining: 22.5s
479:	learn: 274313.3554057	total: 4.29s	remaining: 22.5s
480:	learn: 273840.2380432	total: 4.3s	remaining: 22.5s
481:	learn: 273669.0040973	total: 4.31s	remaining: 22.5s
482:	learn: 273616.2710982	total: 4.32s	remaining: 22.5s
483:	learn: 273313.1354924	total: 4.32s	remaining: 22.5s
484:	learn: 272940.7737711	total: 4.33s	remaining: 22.5s
485:	learn: 272549.9458321	total: 4.34s	remaining: 22.4s
486:	learn: 272413.5438029	total: 4.34s	remaining: 22.4s
487:	learn: 272266.5355468	total: 4.35s	remaining: 22.4s
488:	learn: 272087.1829147	total: 4.36s	remaining: 22.4s
489:	learn: 271938.5969498	total: 4.37s	remaining: 22.4s
490:	learn: 271881.4965591	total: 4.39s	remaining: 22.4s
491:	learn: 271757.6270265	total

629:	learn: 250354.8622532	total: 5.46s	remaining: 20.5s
630:	learn: 250248.5449556	total: 5.47s	remaining: 20.5s
631:	learn: 250191.5904267	total: 5.48s	remaining: 20.5s
632:	learn: 250118.1393856	total: 5.49s	remaining: 20.5s
633:	learn: 249970.2495142	total: 5.49s	remaining: 20.5s
634:	learn: 249885.3558629	total: 5.5s	remaining: 20.5s
635:	learn: 249770.3826782	total: 5.51s	remaining: 20.5s
636:	learn: 249581.4417586	total: 5.52s	remaining: 20.5s
637:	learn: 249455.0916387	total: 5.52s	remaining: 20.4s
638:	learn: 249409.6858032	total: 5.53s	remaining: 20.4s
639:	learn: 249264.5035425	total: 5.54s	remaining: 20.4s
640:	learn: 248920.4997499	total: 5.54s	remaining: 20.4s
641:	learn: 248842.5407986	total: 5.55s	remaining: 20.4s
642:	learn: 248756.0515437	total: 5.56s	remaining: 20.4s
643:	learn: 248692.8868122	total: 5.57s	remaining: 20.4s
644:	learn: 248598.6585935	total: 5.57s	remaining: 20.4s
645:	learn: 248538.7128261	total: 5.58s	remaining: 20.3s
646:	learn: 248448.5072412	total

782:	learn: 232877.4060886	total: 6.67s	remaining: 18.9s
783:	learn: 232853.7195673	total: 6.68s	remaining: 18.9s
784:	learn: 232787.5506811	total: 6.69s	remaining: 18.9s
785:	learn: 232779.1457991	total: 6.7s	remaining: 18.9s
786:	learn: 232710.4351902	total: 6.71s	remaining: 18.9s
787:	learn: 232651.8282518	total: 6.72s	remaining: 18.9s
788:	learn: 232574.2775226	total: 6.72s	remaining: 18.8s
789:	learn: 232524.2387186	total: 6.73s	remaining: 18.8s
790:	learn: 232455.9886251	total: 6.74s	remaining: 18.8s
791:	learn: 232374.5634631	total: 6.75s	remaining: 18.8s
792:	learn: 232304.8321767	total: 6.75s	remaining: 18.8s
793:	learn: 232279.9156097	total: 6.76s	remaining: 18.8s
794:	learn: 232029.9674673	total: 6.77s	remaining: 18.8s
795:	learn: 231880.2049786	total: 6.78s	remaining: 18.8s
796:	learn: 231793.6912819	total: 6.79s	remaining: 18.8s
797:	learn: 231743.4424906	total: 6.8s	remaining: 18.8s
798:	learn: 231572.5148137	total: 6.81s	remaining: 18.8s
799:	learn: 231471.9688551	total:

932:	learn: 220275.5881888	total: 7.88s	remaining: 17.4s
933:	learn: 220226.4057671	total: 7.88s	remaining: 17.4s
934:	learn: 220196.2514503	total: 7.9s	remaining: 17.4s
935:	learn: 219948.5495319	total: 7.91s	remaining: 17.4s
936:	learn: 219885.2280227	total: 7.92s	remaining: 17.4s
937:	learn: 219788.3543846	total: 7.92s	remaining: 17.4s
938:	learn: 219729.5952704	total: 7.93s	remaining: 17.4s
939:	learn: 219700.4872944	total: 7.94s	remaining: 17.4s
940:	learn: 219552.3034466	total: 7.95s	remaining: 17.4s
941:	learn: 219408.4922165	total: 7.95s	remaining: 17.4s
942:	learn: 219391.4053898	total: 7.96s	remaining: 17.4s
943:	learn: 219257.2183597	total: 7.97s	remaining: 17.4s
944:	learn: 219197.1812033	total: 7.98s	remaining: 17.3s
945:	learn: 219072.5637097	total: 7.98s	remaining: 17.3s
946:	learn: 219012.0828550	total: 7.99s	remaining: 17.3s
947:	learn: 218940.3442374	total: 8s	remaining: 17.3s
948:	learn: 218923.9847841	total: 8.01s	remaining: 17.3s
949:	learn: 218904.0047877	total: 8

1085:	learn: 209172.0731625	total: 9.1s	remaining: 16s
1086:	learn: 209073.6575484	total: 9.11s	remaining: 16s
1087:	learn: 209047.0813467	total: 9.12s	remaining: 16s
1088:	learn: 209028.6198585	total: 9.12s	remaining: 16s
1089:	learn: 208868.7757162	total: 9.13s	remaining: 16s
1090:	learn: 208820.9983312	total: 9.14s	remaining: 16s
1091:	learn: 208780.2760767	total: 9.14s	remaining: 16s
1092:	learn: 208638.5403226	total: 9.15s	remaining: 16s
1093:	learn: 208572.1307577	total: 9.16s	remaining: 16s
1094:	learn: 208492.3378585	total: 9.16s	remaining: 15.9s
1095:	learn: 208458.3841467	total: 9.17s	remaining: 15.9s
1096:	learn: 208442.3633390	total: 9.18s	remaining: 15.9s
1097:	learn: 208358.6866427	total: 9.19s	remaining: 15.9s
1098:	learn: 208316.2825130	total: 9.21s	remaining: 15.9s
1099:	learn: 208226.8153476	total: 9.22s	remaining: 15.9s
1100:	learn: 208213.3990593	total: 9.23s	remaining: 15.9s
1101:	learn: 208183.5991079	total: 9.23s	remaining: 15.9s
1102:	learn: 208161.0923641	total

1235:	learn: 198998.3298333	total: 10.3s	remaining: 14.7s
1236:	learn: 198949.8315216	total: 10.3s	remaining: 14.7s
1237:	learn: 198854.4308302	total: 10.3s	remaining: 14.7s
1238:	learn: 198834.5062257	total: 10.3s	remaining: 14.7s
1239:	learn: 198822.3375515	total: 10.3s	remaining: 14.7s
1240:	learn: 198769.7604160	total: 10.3s	remaining: 14.7s
1241:	learn: 198727.2168673	total: 10.4s	remaining: 14.7s
1242:	learn: 198676.6855973	total: 10.4s	remaining: 14.6s
1243:	learn: 198542.4498551	total: 10.4s	remaining: 14.6s
1244:	learn: 198536.6016576	total: 10.4s	remaining: 14.6s
1245:	learn: 198527.2071650	total: 10.4s	remaining: 14.6s
1246:	learn: 198510.8623295	total: 10.4s	remaining: 14.6s
1247:	learn: 198304.1932577	total: 10.4s	remaining: 14.6s
1248:	learn: 198291.3861312	total: 10.4s	remaining: 14.6s
1249:	learn: 198233.1584286	total: 10.4s	remaining: 14.6s
1250:	learn: 198185.9455008	total: 10.4s	remaining: 14.6s
1251:	learn: 198153.4344281	total: 10.4s	remaining: 14.6s
1252:	learn: 1

1381:	learn: 191912.2713669	total: 11.5s	remaining: 13.5s
1382:	learn: 191823.4162507	total: 11.5s	remaining: 13.5s
1383:	learn: 191772.2196506	total: 11.5s	remaining: 13.5s
1384:	learn: 191645.7287551	total: 11.5s	remaining: 13.5s
1385:	learn: 191570.0026546	total: 11.6s	remaining: 13.5s
1386:	learn: 191520.8267305	total: 11.6s	remaining: 13.4s
1387:	learn: 191495.3436764	total: 11.6s	remaining: 13.4s
1388:	learn: 191405.5542490	total: 11.6s	remaining: 13.4s
1389:	learn: 191342.0577121	total: 11.6s	remaining: 13.4s
1390:	learn: 191290.9796529	total: 11.6s	remaining: 13.4s
1391:	learn: 191265.9489623	total: 11.6s	remaining: 13.4s
1392:	learn: 191217.0679933	total: 11.6s	remaining: 13.4s
1393:	learn: 191181.3274227	total: 11.6s	remaining: 13.4s
1394:	learn: 191090.4399695	total: 11.6s	remaining: 13.4s
1395:	learn: 191066.3552694	total: 11.6s	remaining: 13.4s
1396:	learn: 191019.7037186	total: 11.6s	remaining: 13.4s
1397:	learn: 190947.7731014	total: 11.6s	remaining: 13.3s
1398:	learn: 1

1531:	learn: 184266.3655513	total: 12.7s	remaining: 12.2s
1532:	learn: 184253.6216293	total: 12.7s	remaining: 12.2s
1533:	learn: 184173.2933499	total: 12.7s	remaining: 12.2s
1534:	learn: 184153.5939437	total: 12.8s	remaining: 12.2s
1535:	learn: 184131.4021484	total: 12.8s	remaining: 12.2s
1536:	learn: 184068.3186156	total: 12.8s	remaining: 12.2s
1537:	learn: 184058.3420905	total: 12.8s	remaining: 12.1s
1538:	learn: 184032.8769022	total: 12.8s	remaining: 12.1s
1539:	learn: 183965.6227926	total: 12.8s	remaining: 12.1s
1540:	learn: 183912.3616724	total: 12.8s	remaining: 12.1s
1541:	learn: 183901.7554591	total: 12.8s	remaining: 12.1s
1542:	learn: 183864.8072926	total: 12.8s	remaining: 12.1s
1543:	learn: 183848.7858221	total: 12.8s	remaining: 12.1s
1544:	learn: 183828.8317875	total: 12.8s	remaining: 12.1s
1545:	learn: 183815.5713808	total: 12.8s	remaining: 12.1s
1546:	learn: 183787.4458557	total: 12.8s	remaining: 12.1s
1547:	learn: 183772.8762853	total: 12.9s	remaining: 12.1s
1548:	learn: 1

1683:	learn: 178117.1141496	total: 13.9s	remaining: 10.9s
1684:	learn: 178063.0432237	total: 13.9s	remaining: 10.9s
1685:	learn: 178010.5712986	total: 14s	remaining: 10.9s
1686:	learn: 177978.1881558	total: 14s	remaining: 10.9s
1687:	learn: 177935.0760363	total: 14s	remaining: 10.9s
1688:	learn: 177899.7507583	total: 14s	remaining: 10.9s
1689:	learn: 177887.5574943	total: 14s	remaining: 10.8s
1690:	learn: 177876.7922613	total: 14s	remaining: 10.8s
1691:	learn: 177854.2773222	total: 14s	remaining: 10.8s
1692:	learn: 177848.8466482	total: 14s	remaining: 10.8s
1693:	learn: 177842.1717991	total: 14s	remaining: 10.8s
1694:	learn: 177823.3198674	total: 14s	remaining: 10.8s
1695:	learn: 177802.2400632	total: 14s	remaining: 10.8s
1696:	learn: 177757.8006205	total: 14s	remaining: 10.8s
1697:	learn: 177718.8179706	total: 14s	remaining: 10.8s
1698:	learn: 177695.0840112	total: 14.1s	remaining: 10.8s
1699:	learn: 177623.5523421	total: 14.1s	remaining: 10.8s
1700:	learn: 177600.3546437	total: 14.1s

1832:	learn: 172305.7445405	total: 15.1s	remaining: 9.64s
1833:	learn: 172254.8143245	total: 15.2s	remaining: 9.63s
1834:	learn: 172234.7334782	total: 15.2s	remaining: 9.63s
1835:	learn: 172216.9460538	total: 15.2s	remaining: 9.62s
1836:	learn: 172159.4019153	total: 15.2s	remaining: 9.61s
1837:	learn: 172154.8313847	total: 15.2s	remaining: 9.6s
1838:	learn: 172151.6214700	total: 15.2s	remaining: 9.59s
1839:	learn: 172109.0967244	total: 15.2s	remaining: 9.58s
1840:	learn: 172090.3581372	total: 15.2s	remaining: 9.57s
1841:	learn: 172071.1066639	total: 15.2s	remaining: 9.56s
1842:	learn: 172013.0908486	total: 15.2s	remaining: 9.56s
1843:	learn: 171997.5657701	total: 15.2s	remaining: 9.55s
1844:	learn: 171666.9176302	total: 15.2s	remaining: 9.55s
1845:	learn: 171601.0081391	total: 15.3s	remaining: 9.54s
1846:	learn: 171583.7325281	total: 15.3s	remaining: 9.53s
1847:	learn: 171547.7573395	total: 15.3s	remaining: 9.52s
1848:	learn: 171524.4927230	total: 15.3s	remaining: 9.51s
1849:	learn: 17

1984:	learn: 165588.1387951	total: 16.4s	remaining: 8.37s
1985:	learn: 165557.6001123	total: 16.4s	remaining: 8.36s
1986:	learn: 165548.5666749	total: 16.4s	remaining: 8.35s
1987:	learn: 165517.9117195	total: 16.4s	remaining: 8.34s
1988:	learn: 165490.7137942	total: 16.4s	remaining: 8.34s
1989:	learn: 165442.6644758	total: 16.4s	remaining: 8.33s
1990:	learn: 165411.5655094	total: 16.4s	remaining: 8.32s
1991:	learn: 165370.6129911	total: 16.4s	remaining: 8.31s
1992:	learn: 165319.7404976	total: 16.4s	remaining: 8.3s
1993:	learn: 165307.9196761	total: 16.4s	remaining: 8.29s
1994:	learn: 165297.1628891	total: 16.4s	remaining: 8.28s
1995:	learn: 165258.1479390	total: 16.5s	remaining: 8.28s
1996:	learn: 165218.4773016	total: 16.5s	remaining: 8.27s
1997:	learn: 165211.4989397	total: 16.5s	remaining: 8.26s
1998:	learn: 165163.8124633	total: 16.5s	remaining: 8.25s
1999:	learn: 165135.7408465	total: 16.5s	remaining: 8.24s
2000:	learn: 165115.8843920	total: 16.5s	remaining: 8.23s
2001:	learn: 16

2135:	learn: 160751.1448477	total: 17.6s	remaining: 7.12s
2136:	learn: 160717.3847689	total: 17.6s	remaining: 7.11s
2137:	learn: 160671.1589542	total: 17.6s	remaining: 7.1s
2138:	learn: 160637.8045477	total: 17.6s	remaining: 7.09s
2139:	learn: 160634.9935136	total: 17.6s	remaining: 7.08s
2140:	learn: 160612.2614802	total: 17.6s	remaining: 7.08s
2141:	learn: 160379.3789705	total: 17.6s	remaining: 7.07s
2142:	learn: 160283.7488532	total: 17.6s	remaining: 7.06s
2143:	learn: 160224.0096301	total: 17.7s	remaining: 7.05s
2144:	learn: 160206.2947984	total: 17.7s	remaining: 7.04s
2145:	learn: 160195.3738598	total: 17.7s	remaining: 7.03s
2146:	learn: 160174.3593928	total: 17.7s	remaining: 7.02s
2147:	learn: 160160.2030378	total: 17.7s	remaining: 7.02s
2148:	learn: 160148.8847615	total: 17.7s	remaining: 7.01s
2149:	learn: 160128.6831041	total: 17.7s	remaining: 7s
2150:	learn: 160100.9063094	total: 17.7s	remaining: 6.99s
2151:	learn: 160094.1062233	total: 17.7s	remaining: 6.98s
2152:	learn: 16006

2284:	learn: 156535.6689897	total: 18.8s	remaining: 5.88s
2285:	learn: 156475.1189056	total: 18.8s	remaining: 5.88s
2286:	learn: 156463.9577219	total: 18.8s	remaining: 5.87s
2287:	learn: 156454.2092980	total: 18.8s	remaining: 5.86s
2288:	learn: 156396.0388249	total: 18.8s	remaining: 5.85s
2289:	learn: 156360.7179105	total: 18.8s	remaining: 5.84s
2290:	learn: 156348.9129749	total: 18.9s	remaining: 5.83s
2291:	learn: 156338.7782364	total: 18.9s	remaining: 5.83s
2292:	learn: 156211.5235382	total: 18.9s	remaining: 5.82s
2293:	learn: 156188.0443224	total: 18.9s	remaining: 5.81s
2294:	learn: 156134.6730537	total: 18.9s	remaining: 5.8s
2295:	learn: 156107.1348843	total: 18.9s	remaining: 5.79s
2296:	learn: 156074.6536371	total: 18.9s	remaining: 5.78s
2297:	learn: 156047.1078560	total: 18.9s	remaining: 5.78s
2298:	learn: 156009.0885717	total: 18.9s	remaining: 5.77s
2299:	learn: 155988.4769356	total: 18.9s	remaining: 5.76s
2300:	learn: 155951.2548898	total: 18.9s	remaining: 5.75s
2301:	learn: 15

2433:	learn: 152083.8761217	total: 20s	remaining: 4.65s
2434:	learn: 152048.3712323	total: 20s	remaining: 4.65s
2435:	learn: 152036.7612553	total: 20s	remaining: 4.64s
2436:	learn: 152033.0842096	total: 20s	remaining: 4.63s
2437:	learn: 151997.0541078	total: 20.1s	remaining: 4.62s
2438:	learn: 151983.7944213	total: 20.1s	remaining: 4.61s
2439:	learn: 151977.9853279	total: 20.1s	remaining: 4.61s
2440:	learn: 151956.6345705	total: 20.1s	remaining: 4.6s
2441:	learn: 151920.4824080	total: 20.1s	remaining: 4.59s
2442:	learn: 151868.6260095	total: 20.1s	remaining: 4.58s
2443:	learn: 151860.9329832	total: 20.1s	remaining: 4.57s
2444:	learn: 151855.9768138	total: 20.1s	remaining: 4.56s
2445:	learn: 151836.0509094	total: 20.1s	remaining: 4.55s
2446:	learn: 151822.1972426	total: 20.1s	remaining: 4.55s
2447:	learn: 151790.0437146	total: 20.1s	remaining: 4.54s
2448:	learn: 151763.7488686	total: 20.1s	remaining: 4.53s
2449:	learn: 151737.4781517	total: 20.1s	remaining: 4.52s
2450:	learn: 151726.117

2586:	learn: 147983.1187384	total: 21.2s	remaining: 3.39s
2587:	learn: 147953.6936113	total: 21.2s	remaining: 3.38s
2588:	learn: 147940.8789550	total: 21.3s	remaining: 3.37s
2589:	learn: 147921.1147924	total: 21.3s	remaining: 3.37s
2590:	learn: 147869.9149184	total: 21.3s	remaining: 3.36s
2591:	learn: 147841.8861568	total: 21.3s	remaining: 3.35s
2592:	learn: 147834.4613326	total: 21.3s	remaining: 3.34s
2593:	learn: 147800.2072632	total: 21.3s	remaining: 3.33s
2594:	learn: 147738.3733271	total: 21.3s	remaining: 3.32s
2595:	learn: 147703.6678376	total: 21.3s	remaining: 3.31s
2596:	learn: 147693.7276808	total: 21.3s	remaining: 3.31s
2597:	learn: 147679.9058100	total: 21.3s	remaining: 3.3s
2598:	learn: 147646.8208096	total: 21.3s	remaining: 3.29s
2599:	learn: 147626.8638388	total: 21.3s	remaining: 3.28s
2600:	learn: 147612.2819288	total: 21.3s	remaining: 3.27s
2601:	learn: 147595.5169692	total: 21.4s	remaining: 3.27s
2602:	learn: 147587.7548451	total: 21.4s	remaining: 3.26s
2603:	learn: 14

2733:	learn: 144615.1968248	total: 22.4s	remaining: 2.18s
2734:	learn: 144610.8879642	total: 22.4s	remaining: 2.17s
2735:	learn: 144587.5549760	total: 22.5s	remaining: 2.17s
2736:	learn: 144574.4015762	total: 22.5s	remaining: 2.16s
2737:	learn: 144530.4804209	total: 22.5s	remaining: 2.15s
2738:	learn: 144513.7792336	total: 22.5s	remaining: 2.14s
2739:	learn: 144487.3045301	total: 22.5s	remaining: 2.13s
2740:	learn: 144453.3882138	total: 22.5s	remaining: 2.13s
2741:	learn: 144439.9587878	total: 22.5s	remaining: 2.12s
2742:	learn: 144428.0477348	total: 22.5s	remaining: 2.11s
2743:	learn: 144403.4504440	total: 22.5s	remaining: 2.1s
2744:	learn: 144381.2171962	total: 22.5s	remaining: 2.09s
2745:	learn: 144375.5645411	total: 22.5s	remaining: 2.08s
2746:	learn: 144367.1471179	total: 22.5s	remaining: 2.08s
2747:	learn: 144357.1470978	total: 22.6s	remaining: 2.07s
2748:	learn: 144351.6446524	total: 22.6s	remaining: 2.06s
2749:	learn: 144344.4237931	total: 22.6s	remaining: 2.05s
2750:	learn: 14

2884:	learn: 141808.7406081	total: 23.7s	remaining: 943ms
2885:	learn: 141797.3465110	total: 23.7s	remaining: 935ms
2886:	learn: 141789.1926634	total: 23.7s	remaining: 927ms
2887:	learn: 141755.2388945	total: 23.7s	remaining: 919ms
2888:	learn: 141748.4868411	total: 23.7s	remaining: 911ms
2889:	learn: 141714.7174429	total: 23.7s	remaining: 902ms
2890:	learn: 141701.0275945	total: 23.7s	remaining: 894ms
2891:	learn: 141686.3567611	total: 23.7s	remaining: 886ms
2892:	learn: 141657.0863867	total: 23.7s	remaining: 878ms
2893:	learn: 141639.9164660	total: 23.7s	remaining: 870ms
2894:	learn: 141351.1070743	total: 23.7s	remaining: 861ms
2895:	learn: 141314.4793606	total: 23.8s	remaining: 853ms
2896:	learn: 141278.9382683	total: 23.8s	remaining: 845ms
2897:	learn: 141265.3131015	total: 23.8s	remaining: 837ms
2898:	learn: 141236.4898449	total: 23.8s	remaining: 829ms
2899:	learn: 141216.5119111	total: 23.8s	remaining: 820ms
2900:	learn: 141185.1207108	total: 23.8s	remaining: 812ms
2901:	learn: 1

In [78]:
print('percentage error', mean_absolute_percentage_error(y_test, cb_preds))
print('absolute error', mean_absolute_error(y_test, cb_preds))
print('root squared error', mean_squared_error(y_test, cb_preds, squared=False))

percentage error 0.10536735042562807
absolute error 111980.25146985726
root squared error 328522.7438454997


In [79]:
lr_df = pd.DataFrame(data=ada_preds.reshape(-1,1), columns=['ada_preds'])
lr_df['cb_preds'] = cb_preds
lr_df['y'] = y_test.values
train = lr_df[:4500]
test = lr_df[4500:]

In [80]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(train.drop(columns=['y']), train['y'])
lr_preds = lr.predict(test.drop(columns=['y']))

In [81]:
print('percentage error', mean_absolute_percentage_error(test['y'], lr_preds))
print('absolute error', mean_absolute_error(test['y'], lr_preds))
print('root squared error', mean_squared_error(test['y'], lr_preds, squared=False))

percentage error 0.10296023534310411
absolute error 103440.89818658422
root squared error 194582.93144854618


In [83]:
lr.fit(lr_df.drop(columns=['y']), lr_df['y'])

In [84]:
import pickle
filename = 'adaboost_regressor.pkl'
pickle.dump(ada_model, open(filename, 'wb'))
pickle.dump(le, open('district_encode', 'wb'))
filename = 'linear_regressor.pkl'
pickle.dump(lr, open(filename, 'wb'))
cb_model.save_model('cb_model.cbm')

In [86]:
load_model = pickle.load(open('district_encode', 'rb'))
load_model.transform(['praga-południe'])

array([5])