In [39]:
import pandas as pd
import numpy as np
import torch
from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import math
from sklearn.metrics import mean_absolute_percentage_error

In [40]:
def CustomMetrics(y_true, y_pred):
    THRESHOLD = 0.15
    NEGATIVE_WEIGHT = 1.1
    scores = []
    for true, pred in zip(y_true, y_pred):
        deviation = (pred - true) / np.maximum(1e-8, true)
        if np.abs(deviation) <= THRESHOLD:
            scores.append(0)
        elif deviation <= - 4 * THRESHOLD:
            scores.append(9 * NEGATIVE_WEIGHT)
        elif deviation < -THRESHOLD:
            scores.append(NEGATIVE_WEIGHT * ((deviation / THRESHOLD) + 1) ** 2)
        elif deviation < 4 * THRESHOLD:
            scores.append(((deviation / THRESHOLD) - 1) ** 2)
        else:
            scores.append(9)
    return np.array(scores).mean()

In [64]:
#Load the dataset
data = pd.read_csv("../data/train.csv", low_memory = False)
#data = data[data['per_square_meter_price'] < 453032]
#data = data[data['per_square_meter_price'] > 150000].reset_index(drop = True)
data = data[data['per_square_meter_price'] < 500000]
data = data[data['per_square_meter_price'] > 100000].reset_index(drop = True)
data.info()
df_test = pd.read_csv("../data/test.csv", low_memory = False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75218 entries, 0 to 75217
Data columns (total 77 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   city                                 75218 non-null  object 
 1   floor                                38576 non-null  object 
 2   id                                   75218 non-null  object 
 3   lat                                  75218 non-null  float64
 4   lng                                  75218 non-null  float64
 5   osm_amenity_points_in_0.001          75218 non-null  int64  
 6   osm_amenity_points_in_0.005          75218 non-null  int64  
 7   osm_amenity_points_in_0.0075         75218 non-null  int64  
 8   osm_amenity_points_in_0.01           75218 non-null  int64  
 9   osm_building_points_in_0.001         75218 non-null  int64  
 10  osm_building_points_in_0.005         75218 non-null  int64  
 11  osm_building_points_in_0.007

In [65]:
target = 'per_square_meter_price'
cat_features = ['region', 'city', 'realty_type', 'street', 'osm_city_nearest_name']
num_features = ['lat', 'lng', 'osm_amenity_points_in_0.001',
       'osm_amenity_points_in_0.005', 'osm_amenity_points_in_0.0075',
       'osm_amenity_points_in_0.01', 'osm_building_points_in_0.001',
       'osm_building_points_in_0.005', 'osm_building_points_in_0.0075',
       'osm_building_points_in_0.01', 'osm_catering_points_in_0.001',
       'osm_catering_points_in_0.005', 'osm_catering_points_in_0.0075',
       'osm_catering_points_in_0.01', 'osm_city_closest_dist',
       'osm_city_nearest_population',
       'osm_crossing_closest_dist', 'osm_crossing_points_in_0.001',
       'osm_crossing_points_in_0.005', 'osm_crossing_points_in_0.0075',
       'osm_crossing_points_in_0.01', 'osm_culture_points_in_0.001',
       'osm_culture_points_in_0.005', 'osm_culture_points_in_0.0075',
       'osm_culture_points_in_0.01', 'osm_finance_points_in_0.001',
       'osm_finance_points_in_0.005', 'osm_finance_points_in_0.0075',
       'osm_finance_points_in_0.01', 'osm_healthcare_points_in_0.005',
       'osm_healthcare_points_in_0.0075', 'osm_healthcare_points_in_0.01',
       'osm_historic_points_in_0.005', 'osm_historic_points_in_0.0075',
       'osm_historic_points_in_0.01', 'osm_hotels_points_in_0.005',
       'osm_hotels_points_in_0.0075', 'osm_hotels_points_in_0.01',
       'osm_leisure_points_in_0.005', 'osm_leisure_points_in_0.0075',
       'osm_leisure_points_in_0.01', 'osm_offices_points_in_0.001',
       'osm_offices_points_in_0.005', 'osm_offices_points_in_0.0075',
       'osm_offices_points_in_0.01', 'osm_shops_points_in_0.001',
       'osm_shops_points_in_0.005', 'osm_shops_points_in_0.0075',
       'osm_shops_points_in_0.01', 'osm_subway_closest_dist',
       'osm_train_stop_closest_dist', 'osm_train_stop_points_in_0.005',
       'osm_train_stop_points_in_0.0075', 'osm_train_stop_points_in_0.01',
       'osm_transport_stop_closest_dist', 'osm_transport_stop_points_in_0.005',
       'osm_transport_stop_points_in_0.0075',
       'osm_transport_stop_points_in_0.01',
       'reform_count_of_houses_1000', 'reform_count_of_houses_500',
       'reform_house_population_1000', 'reform_house_population_500',
       'reform_mean_floor_count_1000', 'reform_mean_floor_count_500',
       'reform_mean_year_building_1000', 'reform_mean_year_building_500','total_square',
        'price_type'
]

data_processed = data[[target] + cat_features + num_features]
data_processed = data_processed.dropna(subset = ['street'])
df_test_processed = df_test[cat_features + num_features + ['id']]
data_processed.info()
data_processed['realty_type'].value_counts()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 74999 entries, 0 to 75217
Data columns (total 74 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   per_square_meter_price               74999 non-null  float64
 1   region                               74999 non-null  object 
 2   city                                 74999 non-null  object 
 3   realty_type                          74999 non-null  int64  
 4   street                               74999 non-null  object 
 5   osm_city_nearest_name                74999 non-null  object 
 6   lat                                  74999 non-null  float64
 7   lng                                  74999 non-null  float64
 8   osm_amenity_points_in_0.001          74999 non-null  int64  
 9   osm_amenity_points_in_0.005          74999 non-null  int64  
 10  osm_amenity_points_in_0.0075         74999 non-null  int64  
 11  osm_amenity_points_in_0.01  

10     35567
110    22891
100    16541
Name: realty_type, dtype: int64

In [66]:
categorical_columns = []
cat_dims =  {}
for col in cat_features:
    print(col, data_processed[col].nunique())
    l_enc = LabelEncoder()
    l_enc.fit(np.concatenate((data_processed[col].values, df_test[col].values)))
    print(type(data_processed[col].values))
    #data_processed[col] = data_processed[col].fillna("VV_likely")
    data_processed[col] = l_enc.transform(data_processed[col].values)
    df_test[col] = l_enc.transform(df_test[col].values)
    categorical_columns.append(col)
    cat_dims[col] = len(l_enc.classes_)
    
#разделим данные по признаку price_type
'''
data_processed_0 = data_processed[data_processed['price_type'] == 0]
data_processed_1 = data_processed[data_processed['price_type'] == 1]
price_type_part = 0.5
data_processed_1_train, data_test = train_test_split(data_processed_1, train_size = price_type_part)
data_train = pd.concat([data_processed_1_train, data_processed_0], axis = 0)
'''
data_processed_1 = data_processed[data_processed['price_type'] == 1]
data_train, data_test = train_test_split(data_processed_1, train_size = 0.9)
    
for col in num_features:
    tmp_df = data_processed_1.groupby(by=['realty_type']).mean().reset_index()
    d = {key:value for key, value in zip(list(tmp_df['realty_type']), list(tmp_df[col]))}
    data_processed_1[col] = data_processed_1[col].fillna(data_processed_1['realty_type'].map(d))
    data_train[col] = data_train[col].fillna(data_train['realty_type'].map(d))
    data_test[col] = data_test[col].fillna(data_test['realty_type'].map(d))
    df_test[col] = df_test[col].fillna(df_test['realty_type'].map(d))
    #val = data_train[col].mean()
    #elems = []
    #for elem, prop_type in zip(list(data_train[col]), list(data_train['realty_type'])):
    #    if elem == np.nan:
    #        print(1)
    #        tmp = list(tmp_df[tmp_df['realty_type'] == prop_type][col])[0]
    #        elems.append(tmp)
    #   else:
    #        elems.append(elem)
    #data_train[col] = elems
    #elems = []
    #for elem, prop_type in zip(list(data_test[col]), list(data_test['realty_type'])):
    #    if elem == np.nan:
    #        tmp = list(tmp_df[tmp_df['realty_type'] == prop_type][col])[0]
    #        elems.append(tmp)
    #    else:
    #        elems.append(elem)
    #data_test[col] = elems
    
    #data_train.fillna(tmp_df, inplace = True)
    #data_test.fillna(tmp_df, inplace = True)
    #data_train[col] = data_train[col].fillna(val)
    #data_test[col] = data_test[col].fillna(val)
    
data_processed_1 = data_processed_1.drop(columns = ['price_type']).reset_index(drop = True)
data_train = data_train.drop(columns = ['price_type']).reset_index(drop = True)
data_test = data_test.drop(columns = ['price_type']).reset_index(drop = True)
df_test = df_test.drop(columns = ['price_type']).reset_index(drop = True)
data_test.info()

region 49
<class 'numpy.ndarray'>
city 715
<class 'numpy.ndarray'>
realty_type 3
<class 'numpy.ndarray'>
street 8064
<class 'numpy.ndarray'>
osm_city_nearest_name 147
<class 'numpy.ndarray'>


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_processed_1[col] = data_processed_1[col].fillna(data_processed_1['realty_type'].map(d))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_train[col] = data_train[col].fillna(data_train['realty_type'].map(d))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_test[col] = data_test[col].filln

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 73 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   per_square_meter_price               52 non-null     float64
 1   region                               52 non-null     int64  
 2   city                                 52 non-null     int64  
 3   realty_type                          52 non-null     int64  
 4   street                               52 non-null     int64  
 5   osm_city_nearest_name                52 non-null     int64  
 6   lat                                  52 non-null     float64
 7   lng                                  52 non-null     float64
 8   osm_amenity_points_in_0.001          52 non-null     int64  
 9   osm_amenity_points_in_0.005          52 non-null     int64  
 10  osm_amenity_points_in_0.0075         52 non-null     int64  
 11  osm_amenity_points_in_0.01        

In [67]:
if "Set" not in data_train.columns:
    data_train["Set"] = np.random.choice(["train", "valid"],\
                                             p =[.9, .1], size=(data_train.shape[0],))
train_id = data_train[data_train.Set=="train"].index
#test_id = data_train[data_train.Set=="test"].index
val_id = data_train[data_train.Set=="valid"].index

In [68]:
unused_feat = ['Set']

features = [ col for col in data_train.columns if col not in unused_feat+[target]] 

cat_idxs = [ i for i, f in enumerate(features) if f in cat_features]

cat_dimen = [ cat_dims[f] for i, f in enumerate(features) if f in cat_features]

cat_emb_dim = [int(math.sqrt(val)) for val in cat_dimen]

In [46]:
clf = TabNetRegressor(cat_dims=cat_dimen, cat_emb_dim=cat_emb_dim, cat_idxs=cat_idxs)
data_train

Device used : cuda


Unnamed: 0,per_square_meter_price,region,city,realty_type,street,osm_city_nearest_name,lat,lng,osm_amenity_points_in_0.001,osm_amenity_points_in_0.005,...,reform_count_of_houses_1000,reform_count_of_houses_500,reform_house_population_1000,reform_house_population_500,reform_mean_floor_count_1000,reform_mean_floor_count_500,reform_mean_year_building_1000,reform_mean_year_building_500,total_square,Set
0,109448.667403,19,286,0,7159,58,56.058530,92.914987,0,27,...,124,40,4761.0,1546.0,10.699187,10.300000,2003.138211,2002.275000,82.888172,train
1,136159.398436,19,286,2,8422,58,56.006442,92.848994,4,30,...,153,62,2870.0,996.0,6.595890,5.423729,1970.810811,1964.233333,54.685906,train
2,118953.181961,35,541,1,6511,111,59.936710,30.356578,8,341,...,382,119,6710.0,2478.0,4.645070,4.814159,1883.247887,1883.893805,51.432000,train
3,143143.131592,35,541,1,6617,111,59.931694,30.366825,18,335,...,343,73,6194.0,1173.0,4.793210,4.761194,1889.885449,1893.761194,2673.862139,train
4,218068.927126,35,541,1,2195,111,59.932472,30.351921,19,331,...,379,101,7563.0,1763.0,4.855191,4.886598,1876.082192,1878.762887,117.265675,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,106545.776425,21,541,1,6357,111,59.929746,30.378098,9,147,...,352,128,5722.0,1915.0,4.927711,4.909091,1905.792169,1906.661157,462.345873,train
461,118202.628431,32,83,2,3854,20,43.112960,131.918839,10,70,...,94,34,1071.0,374.0,6.097222,6.333333,1955.775281,1949.343750,636.221047,train
462,431794.684981,35,541,2,7283,111,59.936832,30.363586,12,302,...,396,122,7042.0,1866.0,4.716216,4.771930,1885.875676,1888.140351,102.525579,train
463,421288.349919,35,541,1,2196,111,59.932724,30.348995,18,280,...,325,63,6345.0,1230.0,4.820847,4.964912,1874.254902,1874.789474,239.757401,train


In [47]:
X_train = data_processed_1[features].to_numpy()
y_train = data_processed_1[target].to_numpy().reshape(-1, 1)

X_valid = data_train[features].values[val_id]
y_valid = data_train[target].values[val_id].reshape(-1, 1)

X_test = data_test[features].to_numpy()
y_test = data_test[target].to_numpy().reshape(-1, 1)

In [48]:
max_epochs = 1000

In [49]:
clf.fit(
    X_train=X_train, y_train=y_train,
    eval_set=[(X_train, y_train), (X_valid, y_valid)],
    eval_name=['train', 'valid'],
    eval_metric=['rmsle', 'mae', 'rmse', 'mse'],
    max_epochs=max_epochs,
    patience=50,
    batch_size=64, virtual_batch_size=16,
    num_workers=8,
    drop_last=False
)

epoch 0  | loss: 36852192000.49516| train_rmsle: 104.66344| train_mae: 171361.48734| train_rmse: 191964.84058| train_mse: 36850500017.76911| valid_rmsle: 107.02748| valid_mae: 174851.96167| valid_rmse: 192339.97467| valid_mse: 36994665856.32138|  0:00:01s
epoch 1  | loss: 36850713067.2031| train_rmsle: 90.44319| train_mae: 171354.01822| train_rmse: 191957.65315| train_mse: 36847740601.79582| valid_rmsle: 92.59812| valid_mae: 174844.99973| valid_rmse: 192332.16657| valid_mse: 36991662298.01974|  0:00:01s
epoch 2  | loss: 36848902761.96518| train_rmsle: 80.63804| train_mae: 171343.86476| train_rmse: 191948.46929| train_mse: 36844214863.36642| valid_rmsle: 80.80959| valid_mae: 174832.84991| valid_rmse: 192323.05339| valid_mse: 36988156866.12805|  0:00:02s
epoch 3  | loss: 36846209099.26499| train_rmsle: 71.52948| train_mae: 171328.57499| train_rmse: 191933.37638| train_mse: 36838420969.62062| valid_rmsle: 72.51814| valid_mae: 174819.41111| valid_rmse: 192308.96469| valid_mse: 36982737898.

epoch 33 | loss: 35940973094.62282| train_rmsle: 21.17044| train_mae: 168781.07167| train_rmse: 189323.38177| train_mse: 35843342884.94538| valid_rmsle: 18.89784| valid_mae: 172117.5491| valid_rmse: 189557.63164| valid_mse: 35932095714.39282|  0:00:30s
epoch 34 | loss: 35900953954.53772| train_rmsle: 21.99482| train_mae: 168474.15764| train_rmse: 188888.86133| train_mse: 35679001936.20761| valid_rmsle: 20.83058| valid_mae: 171907.94658| valid_rmse: 189212.03028| valid_mse: 35801192401.25041|  0:00:31s
epoch 35 | loss: 35856887447.52031| train_rmsle: 21.61556| train_mae: 168391.07388| train_rmse: 188802.8262| train_mse: 35646507180.41692| valid_rmsle: 21.1462 | valid_mae: 171863.7849| valid_rmse: 189143.83824| valid_mse: 35775391544.7347|  0:00:32s
epoch 36 | loss: 35818250291.4971| train_rmsle: 22.10765| train_mae: 168458.88715| train_rmse: 188843.26761| train_mse: 35661779722.07302| valid_rmsle: 22.22239| valid_mae: 171946.18254| valid_rmse: 189172.2099| valid_mse: 35786124998.58106| 

epoch 66 | loss: 34153340654.66924| train_rmsle: 14.14312| train_mae: 164664.73748| train_rmse: 184655.85202| train_mse: 34097783684.41356| valid_rmsle: 14.4407 | valid_mae: 168289.39049| valid_rmse: 185228.87571| valid_mse: 34309736395.08734|  0:01:03s
epoch 67 | loss: 34101959551.25725| train_rmsle: 13.89164| train_mae: 164314.5914| train_rmse: 184166.23261| train_mse: 33917201233.07527| valid_rmsle: 14.07911| valid_mae: 167952.51162| valid_rmse: 184833.38162| valid_mse: 34163378961.19906|  0:01:04s
epoch 68 | loss: 34025158006.3443| train_rmsle: 13.79535| train_mae: 164225.0282| train_rmse: 184056.22745| train_mse: 33876694861.32346| valid_rmsle: 13.50543| valid_mae: 167696.23896| valid_rmse: 184615.22708| valid_mse: 34082782068.09272|  0:01:05s
epoch 69 | loss: 33980506832.95938| train_rmsle: 13.84257| train_mae: 164261.19089| train_rmse: 184156.0961| train_mse: 33913467731.95512| valid_rmsle: 13.39475| valid_mae: 167706.74158| valid_rmse: 184729.56266| valid_mse: 34125011319.25273

epoch 99 | loss: 31732344479.44294| train_rmsle: 8.54118 | train_mae: 159580.06602| train_rmse: 178495.5375| train_mse: 31860656906.8232| valid_rmsle: 8.55628 | valid_mae: 162526.81789| valid_rmse: 178583.09895| valid_mse: 31891923232.18682|  0:01:36s
epoch 100| loss: 31641795411.68278| train_rmsle: 9.70401 | train_mae: 160156.58316| train_rmse: 179032.71184| train_mse: 32052711909.42492| valid_rmsle: 9.55402 | valid_mae: 163059.35784| valid_rmse: 179048.94336| valid_mse: 32058524119.52216|  0:01:37s
epoch 101| loss: 31555910010.30561| train_rmsle: 9.40089 | train_mae: 158867.09448| train_rmse: 177630.4293| train_mse: 31552569413.18866| valid_rmsle: 9.26332 | valid_mae: 161746.06984| valid_rmse: 177620.98383| valid_mse: 31549213896.78143|  0:01:39s
epoch 102| loss: 31520182422.52998| train_rmsle: 8.97419 | train_mae: 158653.1177| train_rmse: 177228.73691| train_mse: 31410025185.90734| valid_rmsle: 8.81347 | valid_mae: 161487.62146| valid_rmse: 177304.42154| valid_mse: 31436857899.14665

epoch 132| loss: 29092715979.51258| train_rmsle: 6.23644 | train_mae: 151543.52687| train_rmse: 169002.27119| train_mse: 28561767667.01409| valid_rmsle: 6.69558 | valid_mae: 154666.29092| valid_rmse: 169451.25448| valid_mse: 28713727646.2551|  0:02:08s
epoch 133| loss: 28962860588.5648| train_rmsle: 6.51104 | train_mae: 153212.9283| train_rmse: 170861.44955| train_mse: 29193634943.18958| valid_rmsle: 7.07306 | valid_mae: 156758.21372| valid_rmse: 172140.94365| valid_mse: 29632504482.09167|  0:02:10s
epoch 134| loss: 28903871107.71374| train_rmsle: 7.22675 | train_mae: 154225.09336| train_rmse: 171934.1331| train_mse: 29561346124.00278| valid_rmsle: 7.38875 | valid_mae: 157340.98892| valid_rmse: 172903.6881| valid_mse: 29895685359.73084|  0:02:11s
epoch 135| loss: 28762844526.42166| train_rmsle: 6.98085 | train_mae: 153159.8475| train_rmse: 170024.0736| train_mse: 28908185604.41632| valid_rmsle: 6.66945 | valid_mae: 155447.97811| valid_rmse: 169968.57743| valid_mse: 28889317314.8593|  0

epoch 165| loss: 26093411092.30174| train_rmsle: 5.16841 | train_mae: 145625.84809| train_rmse: 162181.87514| train_mse: 26302960624.2569| valid_rmsle: 4.70848 | valid_mae: 147866.81501| valid_rmse: 161929.64501| valid_mse: 26221209933.09828|  0:02:44s
epoch 166| loss: 25940797499.41972| train_rmsle: 5.13735 | train_mae: 144717.14106| train_rmse: 160386.49464| train_mse: 25723827662.85968| valid_rmsle: 4.59762 | valid_mae: 146784.74048| valid_rmse: 160734.48367| valid_mse: 25835574241.4689|  0:02:45s
epoch 167| loss: 25766460138.70793| train_rmsle: 4.6622  | train_mae: 143839.04342| train_rmse: 159752.69643| train_mse: 25520924016.504| valid_rmsle: 4.42178 | valid_mae: 146396.32043| valid_rmse: 160575.66945| valid_mse: 25784545618.69634|  0:02:46s
epoch 168| loss: 25753635008.12379| train_rmsle: 4.85737 | train_mae: 144694.93533| train_rmse: 161195.55683| train_mse: 25984007541.49614| valid_rmsle: 4.59004 | valid_mae: 147305.95443| valid_rmse: 162016.98524| valid_mse: 26249503506.89604

epoch 198| loss: 22823864545.79497| train_rmsle: 3.58637 | train_mae: 132823.57014| train_rmse: 146663.92605| train_mse: 21510307205.08029| valid_rmsle: 3.27033 | valid_mae: 134550.08351| valid_rmse: 147283.34809| valid_mse: 21692384624.67594|  0:03:17s
epoch 199| loss: 22725377667.71374| train_rmsle: 3.8678  | train_mae: 135780.81133| train_rmse: 150139.24555| train_mse: 22541793054.79801| valid_rmsle: 3.78355 | valid_mae: 138820.11125| valid_rmse: 152011.00049| valid_mse: 23107344271.06528|  0:03:18s
epoch 200| loss: 22623093876.8588| train_rmsle: 3.72388 | train_mae: 133294.89861| train_rmse: 146454.56015| train_mse: 21448938188.36182| valid_rmsle: 3.9786  | valid_mae: 136600.77038| valid_rmse: 148500.55468| valid_mse: 22052414740.20036|  0:03:19s
epoch 201| loss: 22498732253.83366| train_rmsle: 3.52045 | train_mae: 132347.29389| train_rmse: 146929.18538| train_mse: 21588185516.5225| valid_rmsle: 3.50616 | valid_mae: 135509.12903| valid_rmse: 148648.29083| valid_mse: 22096314366.215

epoch 231| loss: 19326447338.70793| train_rmsle: 2.37383 | train_mae: 124287.78293| train_rmse: 137413.48364| train_mse: 18882465485.59968| valid_rmsle: 2.30865 | valid_mae: 126191.05893| valid_rmse: 138296.52644| valid_mse: 19125929226.00904|  0:03:51s
epoch 232| loss: 19308551183.84526| train_rmsle: 2.39355 | train_mae: 124621.74545| train_rmse: 138831.96136| train_mse: 19274313494.55854| valid_rmsle: 2.2889  | valid_mae: 126881.66433| valid_rmse: 140290.32932| valid_mse: 19681376499.39307|  0:03:52s
epoch 233| loss: 19049444591.65957| train_rmsle: 2.36871 | train_mae: 126564.63279| train_rmse: 140223.49324| train_mse: 19662628057.50084| valid_rmsle: 2.45684 | valid_mae: 129358.66674| valid_rmse: 142211.4101| valid_mse: 20224085161.43469|  0:03:53s
epoch 234| loss: 19351319332.147| train_rmsle: 2.86623 | train_mae: 131508.96744| train_rmse: 145862.94563| train_mse: 21275998906.49861| valid_rmsle: 2.81011 | valid_mae: 133597.33286| valid_rmse: 146948.1401| valid_mse: 21593755879.1132|

epoch 264| loss: 16048372022.96325| train_rmsle: 1.79155 | train_mae: 109352.77697| train_rmse: 120264.05692| train_mse: 14463443388.016| valid_rmsle: 1.71634 | valid_mae: 111249.02415| valid_rmse: 121666.50095| valid_mse: 14802737454.35981|  0:04:22s
epoch 265| loss: 15890629138.81625| train_rmsle: 1.90145 | train_mae: 113046.97165| train_rmse: 124923.25917| train_mse: 15605820681.09534| valid_rmsle: 1.80705 | valid_mae: 114755.51544| valid_rmse: 125742.65195| valid_mse: 15811214518.44437|  0:04:22s
epoch 266| loss: 15797161045.16828| train_rmsle: 1.93789 | train_mae: 113858.91433| train_rmse: 125611.91075| train_mse: 15778352121.6907| valid_rmsle: 2.05107 | valid_mae: 116355.39459| valid_rmse: 127054.72212| valid_mse: 16142902411.96868|  0:04:23s
epoch 267| loss: 15845286942.70019| train_rmsle: 1.87797 | train_mae: 112138.74375| train_rmse: 124390.03185| train_mse: 15472880022.44717| valid_rmsle: 1.95252 | valid_mae: 115663.17157| valid_rmse: 127404.16064| valid_mse: 16231820149.3611

epoch 297| loss: 12838891634.87814| train_rmsle: 1.37906 | train_mae: 105541.17519| train_rmse: 116109.58917| train_mse: 13481436697.63648| valid_rmsle: 1.41702 | valid_mae: 107811.76233| valid_rmse: 118713.90614| valid_mse: 14092991511.3034|  0:04:55s
epoch 298| loss: 12631134291.18762| train_rmsle: 1.13271 | train_mae: 102572.70522| train_rmse: 114071.88815| train_mse: 13012395666.70982| valid_rmsle: 1.10119 | valid_mae: 103673.63486| valid_rmse: 114908.98544| valid_mse: 13204074934.91377|  0:04:56s
epoch 299| loss: 12552708298.02708| train_rmsle: 0.96522 | train_mae: 95386.08634| train_rmse: 105642.08342| train_mse: 11160249789.93904| valid_rmsle: 0.9589  | valid_mae: 97474.2697| valid_rmse: 107776.95942| valid_mse: 11615872982.77842|  0:04:57s
epoch 300| loss: 12412211449.56286| train_rmsle: 1.09198 | train_mae: 91599.71682| train_rmse: 102077.65997| train_mse: 10419848664.93708| valid_rmsle: 1.04142 | valid_mae: 93287.80664| valid_rmse: 104339.87867| valid_mse: 10886810281.48695| 

epoch 330| loss: 9922411546.73888| train_rmsle: 0.65933 | train_mae: 74805.23126| train_rmse: 82285.03195| train_mse: 6770826482.46026| valid_rmsle: 0.5239  | valid_mae: 74388.88867| valid_rmse: 83316.8958| valid_mse: 6941705126.07515|  0:05:25s
epoch 331| loss: 9888481244.34816| train_rmsle: 0.6064  | train_mae: 77247.51119| train_rmse: 88228.13753| train_mse: 7784204251.32729| valid_rmsle: 0.45752 | valid_mae: 77995.37599| valid_rmse: 90164.61591| valid_mse: 8129657961.9116|  0:05:26s
epoch 332| loss: 9708508135.24178| train_rmsle: 0.59029 | train_mae: 77414.85738| train_rmse: 86852.5897| train_mse: 7543372336.76214| valid_rmsle: 0.52876 | valid_mae: 79533.99932| valid_rmse: 90692.61298| valid_mse: 8225150049.8023|  0:05:27s
epoch 333| loss: 9310666943.13346| train_rmsle: 0.65595 | train_mae: 80935.76239| train_rmse: 90902.92634| train_mse: 8263342016.83524| valid_rmsle: 0.56895 | valid_mae: 82249.62836| valid_rmse: 93862.75725| valid_mse: 8810217199.42899|  0:05:28s
epoch 334| loss:

epoch 364| loss: 7365410103.45841| train_rmsle: 0.46163 | train_mae: 69276.85233| train_rmse: 77049.30769| train_mse: 5936595815.02792| valid_rmsle: 0.4368  | valid_mae: 70977.13614| valid_rmse: 79966.8905| valid_mse: 6394703576.40988|  0:05:57s
epoch 365| loss: 7238418110.14313| train_rmsle: 0.4473  | train_mae: 73213.06553| train_rmse: 86283.73638| train_mse: 7444883163.3437| valid_rmsle: 0.45297 | valid_mae: 75317.94326| valid_rmse: 89295.93132| valid_mse: 7973763350.57137|  0:05:58s
epoch 366| loss: 7137482661.88008| train_rmsle: 0.37202 | train_mae: 63377.74169| train_rmse: 71785.36322| train_mse: 5153138373.18028| valid_rmsle: 0.37835 | valid_mae: 64252.78877| valid_rmse: 73556.78254| valid_mse: 5410600257.74764|  0:05:59s
epoch 367| loss: 6845933885.89555| train_rmsle: 0.43592 | train_mae: 73610.97632| train_rmse: 85703.73657| train_mse: 7345130462.67839| valid_rmsle: 0.4613  | valid_mae: 76119.43817| valid_rmse: 90163.22284| valid_mse: 8129406753.78067|  0:06:00s
epoch 368| los

epoch 398| loss: 5077995646.76209| train_rmsle: 0.18525 | train_mae: 55788.63933| train_rmse: 68959.80468| train_mse: 4755454660.96929| valid_rmsle: 0.20377 | valid_mae: 58154.04345| valid_rmse: 73074.8823| valid_mse: 5339938423.44059|  0:06:29s
epoch 399| loss: 5107435688.3559| train_rmsle: 0.18419 | train_mae: 54885.37404| train_rmse: 64952.61388| train_mse: 4218842049.5867| valid_rmsle: 0.20765 | valid_mae: 58204.39565| valid_rmse: 70628.89648| valid_mse: 4988441018.54172|  0:06:30s
epoch 400| loss: 5178539497.22244| train_rmsle: 0.17537 | train_mae: 55943.13498| train_rmse: 70245.4164| train_mse: 4934418525.76355| valid_rmsle: 0.20483 | valid_mae: 59090.0624| valid_rmse: 74909.8191| valid_mse: 5611480997.78264|  0:06:31s
epoch 401| loss: 4932874873.81044| train_rmsle: 0.22123 | train_mae: 58445.93994| train_rmse: 65626.15417| train_mse: 4306792111.63518| valid_rmsle: 0.24578 | valid_mae: 62302.00791| valid_rmse: 72608.08068| valid_mse: 5271933379.68983|  0:06:32s
epoch 402| loss: 4

epoch 432| loss: 3407345295.59768| train_rmsle: 0.14893 | train_mae: 49112.99321| train_rmse: 59287.10018| train_mse: 3514960248.26808| valid_rmsle: 0.13847 | valid_mae: 50129.13188| valid_rmse: 62755.73468| valid_mse: 3938282235.54779|  0:07:00s
epoch 433| loss: 3849369625.74855| train_rmsle: 0.11756 | train_mae: 43310.21559| train_rmse: 57350.285| train_mse: 3289055189.0204| valid_rmsle: 0.11613 | valid_mae: 45677.25418| valid_rmse: 62653.55435| valid_mse: 3925467872.58169|  0:07:01s
epoch 434| loss: 3422457440.0619| train_rmsle: 0.10513 | train_mae: 35290.92268| train_rmse: 43125.92489| train_mse: 1859845397.70349| valid_rmsle: 0.0947  | valid_mae: 36780.34956| valid_rmse: 49437.04849| valid_mse: 2444021763.4441|  0:07:02s
epoch 435| loss: 3297527203.15667| train_rmsle: 0.10755 | train_mae: 36624.4834| train_rmse: 47959.158| train_mse: 2300080835.73806| valid_rmsle: 0.09958 | valid_mae: 39023.84978| valid_rmse: 54065.79271| valid_mse: 2923109941.03486|  0:07:03s
epoch 436| loss: 312

epoch 466| loss: 2477743248.58801| train_rmsle: 0.04127 | train_mae: 25293.10042| train_rmse: 39278.21952| train_mse: 1542778528.98117| valid_rmsle: 0.04745 | valid_mae: 27965.54145| valid_rmse: 43585.6168| valid_mse: 1899705991.99975|  0:07:30s
epoch 467| loss: 2409470044.10058| train_rmsle: 0.03919 | train_mae: 25035.9988| train_rmse: 32443.94681| train_mse: 1052609684.73392| valid_rmsle: 0.03277 | valid_mae: 26764.68133| valid_rmse: 36546.79526| valid_mse: 1335668244.02502|  0:07:31s
epoch 468| loss: 2075084961.17602| train_rmsle: 0.03744 | train_mae: 25377.62425| train_rmse: 38544.5615| train_mse: 1485683221.04954| valid_rmsle: 0.03101 | valid_mae: 27413.17092| valid_rmse: 42048.06437| valid_mse: 1768039717.00643|  0:07:32s
epoch 469| loss: 1967373503.13346| train_rmsle: 0.03554 | train_mae: 22435.93212| train_rmse: 30251.29531| train_mse: 915140867.99081| valid_rmsle: 0.02552 | valid_mae: 24098.23949| valid_rmse: 34907.11345| valid_mse: 1218506569.518|  0:07:33s
epoch 470| loss: 2

epoch 500| loss: 1560470230.15861| train_rmsle: 0.02051 | train_mae: 18335.16649| train_rmse: 27282.38751| train_mse: 744328668.49468| valid_rmsle: 0.02639 | valid_mae: 22518.18298| valid_rmse: 36322.16994| valid_mse: 1319300029.31168|  0:08:03s
epoch 501| loss: 1432443154.32108| train_rmsle: 0.0223  | train_mae: 19736.82219| train_rmse: 30269.6937| train_mse: 916254356.89004| valid_rmsle: 0.02634 | valid_mae: 22513.21516| valid_rmse: 36982.7772| valid_mse: 1367725809.53506|  0:08:04s
epoch 502| loss: 1691146069.66344| train_rmsle: 0.03397 | train_mae: 26299.00131| train_rmse: 38745.80269| train_mse: 1501237226.34034| valid_rmsle: 0.04027 | valid_mae: 28893.78643| valid_rmse: 45612.52229| valid_mse: 2080502189.86652|  0:08:05s
epoch 503| loss: 1587964324.88975| train_rmsle: 0.03925 | train_mae: 27951.37462| train_rmse: 36438.95175| train_mse: 1327797204.99821| valid_rmsle: 0.0434  | valid_mae: 31007.08051| valid_rmse: 43260.71594| valid_mse: 1871489543.74054|  0:08:06s
epoch 504| loss:

epoch 534| loss: 1225431041.98066| train_rmsle: 0.01202 | train_mae: 14075.75778| train_rmse: 21085.21179| train_mse: 444586156.1408| valid_rmsle: 0.01332 | valid_mae: 15796.98269| valid_rmse: 26234.53284| valid_mse: 688250713.59139|  0:08:37s
epoch 535| loss: 1402682064.95938| train_rmsle: 0.01047 | train_mae: 13016.84923| train_rmse: 21138.97093| train_mse: 446856092.03855| valid_rmsle: 0.01276 | valid_mae: 15528.68659| valid_rmse: 26921.36075| valid_mse: 724759664.60003|  0:08:38s
epoch 536| loss: 1722798044.34816| train_rmsle: 0.02027 | train_mae: 17599.13026| train_rmse: 29171.48832| train_mse: 850975730.64217| valid_rmsle: 0.02112 | valid_mae: 19672.48954| valid_rmse: 34558.81818| valid_mse: 1194311914.12615|  0:08:39s
epoch 537| loss: 1462909620.23985| train_rmsle: 0.02143 | train_mae: 18874.91936| train_rmse: 30548.58186| train_mse: 933215853.83229| valid_rmsle: 0.02285 | valid_mae: 21039.47083| valid_rmse: 35984.87078| valid_mse: 1294910924.77234|  0:08:40s
epoch 538| loss: 12

In [50]:
preds = clf.predict(X_test)

y_true = y_test

test_score = mean_absolute_percentage_error(y_pred=preds, y_true=y_true)
print("Test MAPE:", test_score)
print("Test CUSTOM SCORE", CustomMetrics(y_true, 0.95*preds))

Test MAPE: 0.09093584496682086
Test CUSTOM SCORE [0.25551674]


In [72]:
id_score = np.array(list(df_test['id'])).reshape(-1, 1)
X_score = df_test[features].to_numpy()
y_score = clf.predict(X_score)*0.94
print(id_score)

result = pd.DataFrame(data = np.concatenate((id_score, y_score), axis = 1),\
                      columns = ['id', 'per_square_meter_price'])
result.to_csv("puplic_prediction.csv", index = False)

[['COL_289284']
 ['COL_289305']
 ['COL_289318']
 ...
 ['COL_455261']
 ['COL_455381']
 ['COL_455397']]
