In [39]:
import pandas as pd
import numpy as np
import torch
from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import math
from sklearn.metrics import mean_absolute_percentage_error

In [40]:
def CustomMetrics(y_true, y_pred):
    THRESHOLD = 0.15
    NEGATIVE_WEIGHT = 1.1
    scores = []
    for true, pred in zip(y_true, y_pred):
        deviation = (pred - true) / np.maximum(1e-8, true)
        if np.abs(deviation) <= THRESHOLD:
            scores.append(0)
        elif deviation <= - 4 * THRESHOLD:
            scores.append(9 * NEGATIVE_WEIGHT)
        elif deviation < -THRESHOLD:
            scores.append(NEGATIVE_WEIGHT * ((deviation / THRESHOLD) + 1) ** 2)
        elif deviation < 4 * THRESHOLD:
            scores.append(((deviation / THRESHOLD) - 1) ** 2)
        else:
            scores.append(9)
    return np.array(scores).mean()

In [64]:
#Load the dataset
data = pd.read_csv("../data/train.csv", low_memory = False)
#data = data[data['per_square_meter_price'] < 453032]
#data = data[data['per_square_meter_price'] > 150000].reset_index(drop = True)
data = data[data['per_square_meter_price'] < 500000]
data = data[data['per_square_meter_price'] > 100000].reset_index(drop = True)
data.info()
df_test = pd.read_csv("../data/test.csv", low_memory = False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75218 entries, 0 to 75217
Data columns (total 77 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   city                                 75218 non-null  object 
 1   floor                                38576 non-null  object 
 2   id                                   75218 non-null  object 
 3   lat                                  75218 non-null  float64
 4   lng                                  75218 non-null  float64
 5   osm_amenity_points_in_0.001          75218 non-null  int64  
 6   osm_amenity_points_in_0.005          75218 non-null  int64  
 7   osm_amenity_points_in_0.0075         75218 non-null  int64  
 8   osm_amenity_points_in_0.01           75218 non-null  int64  
 9   osm_building_points_in_0.001         75218 non-null  int64  
 10  osm_building_points_in_0.005         75218 non-null  int64  
 11  osm_building_points_in_0.007

In [65]:
target = 'per_square_meter_price'
cat_features = ['region', 'city', 'realty_type', 'street', 'osm_city_nearest_name']
num_features = ['lat', 'lng', 'osm_amenity_points_in_0.001',
       'osm_amenity_points_in_0.005', 'osm_amenity_points_in_0.0075',
       'osm_amenity_points_in_0.01', 'osm_building_points_in_0.001',
       'osm_building_points_in_0.005', 'osm_building_points_in_0.0075',
       'osm_building_points_in_0.01', 'osm_catering_points_in_0.001',
       'osm_catering_points_in_0.005', 'osm_catering_points_in_0.0075',
       'osm_catering_points_in_0.01', 'osm_city_closest_dist',
       'osm_city_nearest_population',
       'osm_crossing_closest_dist', 'osm_crossing_points_in_0.001',
       'osm_crossing_points_in_0.005', 'osm_crossing_points_in_0.0075',
       'osm_crossing_points_in_0.01', 'osm_culture_points_in_0.001',
       'osm_culture_points_in_0.005', 'osm_culture_points_in_0.0075',
       'osm_culture_points_in_0.01', 'osm_finance_points_in_0.001',
       'osm_finance_points_in_0.005', 'osm_finance_points_in_0.0075',
       'osm_finance_points_in_0.01', 'osm_healthcare_points_in_0.005',
       'osm_healthcare_points_in_0.0075', 'osm_healthcare_points_in_0.01',
       'osm_historic_points_in_0.005', 'osm_historic_points_in_0.0075',
       'osm_historic_points_in_0.01', 'osm_hotels_points_in_0.005',
       'osm_hotels_points_in_0.0075', 'osm_hotels_points_in_0.01',
       'osm_leisure_points_in_0.005', 'osm_leisure_points_in_0.0075',
       'osm_leisure_points_in_0.01', 'osm_offices_points_in_0.001',
       'osm_offices_points_in_0.005', 'osm_offices_points_in_0.0075',
       'osm_offices_points_in_0.01', 'osm_shops_points_in_0.001',
       'osm_shops_points_in_0.005', 'osm_shops_points_in_0.0075',
       'osm_shops_points_in_0.01', 'osm_subway_closest_dist',
       'osm_train_stop_closest_dist', 'osm_train_stop_points_in_0.005',
       'osm_train_stop_points_in_0.0075', 'osm_train_stop_points_in_0.01',
       'osm_transport_stop_closest_dist', 'osm_transport_stop_points_in_0.005',
       'osm_transport_stop_points_in_0.0075',
       'osm_transport_stop_points_in_0.01',
       'reform_count_of_houses_1000', 'reform_count_of_houses_500',
       'reform_house_population_1000', 'reform_house_population_500',
       'reform_mean_floor_count_1000', 'reform_mean_floor_count_500',
       'reform_mean_year_building_1000', 'reform_mean_year_building_500','total_square',
        'price_type'
]

data_processed = data[[target] + cat_features + num_features]
data_processed = data_processed.dropna(subset = ['street'])
df_test_processed = df_test[cat_features + num_features + ['id']]
data_processed.info()
data_processed['realty_type'].value_counts()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 74999 entries, 0 to 75217
Data columns (total 74 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   per_square_meter_price               74999 non-null  float64
 1   region                               74999 non-null  object 
 2   city                                 74999 non-null  object 
 3   realty_type                          74999 non-null  int64  
 4   street                               74999 non-null  object 
 5   osm_city_nearest_name                74999 non-null  object 
 6   lat                                  74999 non-null  float64
 7   lng                                  74999 non-null  float64
 8   osm_amenity_points_in_0.001          74999 non-null  int64  
 9   osm_amenity_points_in_0.005          74999 non-null  int64  
 10  osm_amenity_points_in_0.0075         74999 non-null  int64  
 11  osm_amenity_points_in_0.01  

10     35567
110    22891
100    16541
Name: realty_type, dtype: int64

In [66]:
categorical_columns = []
cat_dims =  {}
for col in cat_features:
    print(col, data_processed[col].nunique())
    l_enc = LabelEncoder()
    l_enc.fit(np.concatenate((data_processed[col].values, df_test[col].values)))
    print(type(data_processed[col].values))
    #data_processed[col] = data_processed[col].fillna("VV_likely")
    data_processed[col] = l_enc.transform(data_processed[col].values)
    df_test[col] = l_enc.transform(df_test[col].values)
    categorical_columns.append(col)
    cat_dims[col] = len(l_enc.classes_)
    
#разделим данные по признаку price_type
'''
data_processed_0 = data_processed[data_processed['price_type'] == 0]
data_processed_1 = data_processed[data_processed['price_type'] == 1]
price_type_part = 0.5
data_processed_1_train, data_test = train_test_split(data_processed_1, train_size = price_type_part)
data_train = pd.concat([data_processed_1_train, data_processed_0], axis = 0)
'''
data_processed_1 = data_processed[data_processed['price_type'] == 1]
data_train, data_test = train_test_split(data_processed_1, train_size = 0.9)
    
for col in num_features:
    tmp_df = data_processed_1.groupby(by=['realty_type']).mean().reset_index()
    d = {key:value for key, value in zip(list(tmp_df['realty_type']), list(tmp_df[col]))}
    data_processed_1[col] = data_processed_1[col].fillna(data_processed_1['realty_type'].map(d))
    data_train[col] = data_train[col].fillna(data_train['realty_type'].map(d))
    data_test[col] = data_test[col].fillna(data_test['realty_type'].map(d))
    df_test[col] = df_test[col].fillna(df_test['realty_type'].map(d))
    #val = data_train[col].mean()
    #elems = []
    #for elem, prop_type in zip(list(data_train[col]), list(data_train['realty_type'])):
    #    if elem == np.nan:
    #        print(1)
    #        tmp = list(tmp_df[tmp_df['realty_type'] == prop_type][col])[0]
    #        elems.append(tmp)
    #   else:
    #        elems.append(elem)
    #data_train[col] = elems
    #elems = []
    #for elem, prop_type in zip(list(data_test[col]), list(data_test['realty_type'])):
    #    if elem == np.nan:
    #        tmp = list(tmp_df[tmp_df['realty_type'] == prop_type][col])[0]
    #        elems.append(tmp)
    #    else:
    #        elems.append(elem)
    #data_test[col] = elems
    
    #data_train.fillna(tmp_df, inplace = True)
    #data_test.fillna(tmp_df, inplace = True)
    #data_train[col] = data_train[col].fillna(val)
    #data_test[col] = data_test[col].fillna(val)
    
data_processed_1 = data_processed_1.drop(columns = ['price_type']).reset_index(drop = True)
data_train = data_train.drop(columns = ['price_type']).reset_index(drop = True)
data_test = data_test.drop(columns = ['price_type']).reset_index(drop = True)
df_test = df_test.drop(columns = ['price_type']).reset_index(drop = True)
data_test.info()

region 49
<class 'numpy.ndarray'>
city 715
<class 'numpy.ndarray'>
realty_type 3
<class 'numpy.ndarray'>
street 8064
<class 'numpy.ndarray'>
osm_city_nearest_name 147
<class 'numpy.ndarray'>


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_processed_1[col] = data_processed_1[col].fillna(data_processed_1['realty_type'].map(d))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_train[col] = data_train[col].fillna(data_train['realty_type'].map(d))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_test[col] = data_test[col].filln

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 73 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   per_square_meter_price               52 non-null     float64
 1   region                               52 non-null     int64  
 2   city                                 52 non-null     int64  
 3   realty_type                          52 non-null     int64  
 4   street                               52 non-null     int64  
 5   osm_city_nearest_name                52 non-null     int64  
 6   lat                                  52 non-null     float64
 7   lng                                  52 non-null     float64
 8   osm_amenity_points_in_0.001          52 non-null     int64  
 9   osm_amenity_points_in_0.005          52 non-null     int64  
 10  osm_amenity_points_in_0.0075         52 non-null     int64  
 11  osm_amenity_points_in_0.01        

In [67]:
if "Set" not in data_train.columns:
    data_train["Set"] = np.random.choice(["train", "valid"],\
                                             p =[.9, .1], size=(data_train.shape[0],))
train_id = data_train[data_train.Set=="train"].index
#test_id = data_train[data_train.Set=="test"].index
val_id = data_train[data_train.Set=="valid"].index

In [68]:
unused_feat = ['Set']

features = [ col for col in data_train.columns if col not in unused_feat+[target]] 

cat_idxs = [ i for i, f in enumerate(features) if f in cat_features]

cat_dimen = [ cat_dims[f] for i, f in enumerate(features) if f in cat_features]

cat_emb_dim = [int(math.sqrt(val)) for val in cat_dimen]

In [87]:
clf = TabNetRegressor(cat_dims=cat_dimen, cat_emb_dim=cat_emb_dim, cat_idxs=cat_idxs)
data_train

Device used : cuda


Unnamed: 0,per_square_meter_price,region,city,realty_type,street,osm_city_nearest_name,lat,lng,osm_amenity_points_in_0.001,osm_amenity_points_in_0.005,...,reform_count_of_houses_1000,reform_count_of_houses_500,reform_house_population_1000,reform_house_population_500,reform_mean_floor_count_1000,reform_mean_floor_count_500,reform_mean_year_building_1000,reform_mean_year_building_500,total_square,Set
0,102930.096674,19,286,0,6122,58,56.013256,92.883047,4,45,...,128,66,2314.0,1104.0,6.455285,5.906250,1971.385246,1969.634921,281.414289,train
1,195741.172773,35,541,1,2191,111,59.933533,30.355501,10,322,...,405,109,7380.0,2004.0,4.819121,4.839623,1879.194805,1875.641509,62.352748,train
2,112652.605164,25,524,2,5370,62,55.751263,37.874082,19,109,...,91,36,4336.0,1473.0,15.527473,14.388889,1994.000000,1995.416667,108.262032,train
3,187312.473685,35,541,1,2196,111,59.931900,30.351117,22,346,...,404,111,7967.0,1905.0,4.864103,4.850467,1877.602067,1878.121495,117.007691,train
4,431794.684981,35,541,2,7283,111,59.936832,30.363586,12,302,...,396,122,7042.0,1866.0,4.716216,4.771930,1885.875676,1888.140351,102.525579,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,129166.358063,35,541,1,7598,111,59.929863,30.317603,10,354,...,240,87,4871.0,2018.0,4.556650,4.649351,1853.029557,1836.064935,54.371743,valid
461,107316.343336,35,541,0,2191,111,59.927207,30.381734,10,129,...,241,75,3720.0,1013.0,4.986842,5.000000,1908.219298,1904.637681,154.049229,train
462,103644.542794,19,286,0,4253,58,56.016285,92.869379,0,20,...,90,25,1426.0,269.0,5.590909,4.826087,1966.897727,1969.434783,68.821761,train
463,154877.643110,35,541,1,2196,111,59.932194,30.352927,19,342,...,397,108,7788.0,1887.0,4.850785,4.855769,1877.229551,1877.980769,110.926275,train


In [88]:
X_train = data_processed_1[features].to_numpy()
y_train = data_processed_1[target].to_numpy().reshape(-1, 1)

X_valid = data_train[features].values[val_id]
y_valid = data_train[target].values[val_id].reshape(-1, 1)

X_test = data_test[features].to_numpy()
y_test = data_test[target].to_numpy().reshape(-1, 1)

In [89]:
max_epochs = 450

In [90]:
clf.fit(
    X_train=X_train, y_train=y_train,
    eval_set=[(X_train, y_train), (X_valid, y_valid)],
    eval_name=['train', 'valid'],
    eval_metric=['rmsle', 'mae', 'rmse', 'mse'],
    max_epochs=max_epochs,
    patience=50,
    batch_size=64, virtual_batch_size=16,
    num_workers=8,
    drop_last=False
)

epoch 0  | loss: 36852192000.49516| train_rmsle: 104.66344| train_mae: 171361.48734| train_rmse: 191964.84058| train_mse: 36850500017.76911| valid_rmsle: 105.62674| valid_mae: 186292.85068| valid_rmse: 214815.0478| valid_mse: 46145504762.1299|  0:00:00s
epoch 1  | loss: 36850713067.2031| train_rmsle: 90.44319| train_mae: 171354.01822| train_rmse: 191957.65315| train_mse: 36847740601.79582| valid_rmsle: 91.04976| valid_mae: 186285.11464| valid_rmse: 214806.14769| valid_mse: 46141681084.44725|  0:00:01s
epoch 2  | loss: 36848902761.96518| train_rmsle: 80.63804| train_mae: 171343.86476| train_rmse: 191948.46929| train_mse: 36844214863.36642| valid_rmsle: 80.98252| valid_mae: 186275.47988| valid_rmse: 214798.69386| valid_mse: 46138478884.7662|  0:00:02s
epoch 3  | loss: 36846209099.26499| train_rmsle: 71.52948| train_mae: 171328.57499| train_rmse: 191933.37638| train_mse: 36838420969.62062| valid_rmsle: 72.5    | valid_mae: 186260.60091| valid_rmse: 214786.30157| valid_mse: 46133155340.904

epoch 33 | loss: 35940973094.62282| train_rmsle: 21.17044| train_mae: 168781.07167| train_rmse: 189323.38177| train_mse: 35843342884.94538| valid_rmsle: 23.379  | valid_mae: 183781.41987| valid_rmse: 212113.59048| valid_mse: 44992175264.364|  0:00:26s
epoch 34 | loss: 35900953954.53772| train_rmsle: 21.99482| train_mae: 168474.15764| train_rmse: 188888.86133| train_mse: 35679001936.20761| valid_rmsle: 23.535  | valid_mae: 183502.04735| valid_rmse: 211684.70058| valid_mse: 44810412460.13112|  0:00:27s
epoch 35 | loss: 35856887447.52031| train_rmsle: 21.61556| train_mae: 168391.07388| train_rmse: 188802.8262| train_mse: 35646507180.41692| valid_rmsle: 23.18107| valid_mae: 183399.96771| valid_rmse: 211570.19546| valid_mse: 44761947605.41598|  0:00:28s
epoch 36 | loss: 35818250291.4971| train_rmsle: 22.10765| train_mae: 168458.88715| train_rmse: 188843.26761| train_mse: 35661779722.07302| valid_rmsle: 22.87842| valid_mae: 183392.48873| valid_rmse: 211537.7751| valid_mse: 44748230294.04416|

epoch 66 | loss: 34153340654.66924| train_rmsle: 14.14312| train_mae: 164664.73748| train_rmse: 184655.85202| train_mse: 34097783684.41356| valid_rmsle: 14.56018| valid_mae: 179139.13029| valid_rmse: 206678.97913| valid_mse: 42716200413.20216|  0:00:52s
epoch 67 | loss: 34101959551.25725| train_rmsle: 13.89164| train_mae: 164314.5914| train_rmse: 184166.23261| train_mse: 33917201233.07527| valid_rmsle: 14.36063| valid_mae: 178767.30083| valid_rmse: 206192.61703| valid_mse: 42515395317.7869|  0:00:53s
epoch 68 | loss: 34025158006.3443| train_rmsle: 13.79535| train_mae: 164225.0282| train_rmse: 184056.22745| train_mse: 33876694861.32346| valid_rmsle: 13.86419| valid_mae: 178482.69495| valid_rmse: 205835.83887| valid_mse: 42368392563.34842|  0:00:54s
epoch 69 | loss: 33980506832.95938| train_rmsle: 13.84257| train_mae: 164261.19089| train_rmse: 184156.0961| train_mse: 33913467731.95512| valid_rmsle: 13.82729| valid_mae: 178364.79714| valid_rmse: 205649.45971| valid_mse: 42291700280.5827| 

epoch 99 | loss: 31732344479.44294| train_rmsle: 8.54118 | train_mae: 159580.06602| train_rmse: 178495.5375| train_mse: 31860656906.8232| valid_rmsle: 8.70383 | valid_mae: 173068.53854| valid_rmse: 198930.63508| valid_mse: 39573397573.89469|  0:01:18s
epoch 100| loss: 31641795411.68278| train_rmsle: 9.70401 | train_mae: 160156.58316| train_rmse: 179032.71184| train_mse: 32052711909.42492| valid_rmsle: 9.80914 | valid_mae: 173605.56932| valid_rmse: 199442.72414| valid_mse: 39777400211.03993|  0:01:19s
epoch 101| loss: 31555910010.30561| train_rmsle: 9.40089 | train_mae: 158867.09448| train_rmse: 177630.4293| train_mse: 31552569413.18866| valid_rmsle: 9.19367 | valid_mae: 172308.83593| valid_rmse: 198129.3744| valid_mse: 39255248999.11922|  0:01:20s
epoch 102| loss: 31520182422.52998| train_rmsle: 8.97419 | train_mae: 158653.1177| train_rmse: 177228.73691| train_mse: 31410025185.90734| valid_rmsle: 8.92553 | valid_mae: 172053.59715| valid_rmse: 197679.2004| valid_mse: 39077066272.72504| 

epoch 132| loss: 29092715979.51258| train_rmsle: 6.23644 | train_mae: 151543.52687| train_rmse: 169002.27119| train_mse: 28561767667.01409| valid_rmsle: 6.37343 | valid_mae: 164221.06509| valid_rmse: 188296.38149| valid_mse: 35455527282.13356|  0:01:45s
epoch 133| loss: 28962860588.5648| train_rmsle: 6.51104 | train_mae: 153212.9283| train_rmse: 170861.44955| train_mse: 29193634943.18958| valid_rmsle: 6.46298 | valid_mae: 165596.47525| valid_rmse: 189954.65082| valid_mse: 36082769369.6723|  0:01:46s
epoch 134| loss: 28903871107.71374| train_rmsle: 7.22675 | train_mae: 154225.09336| train_rmse: 171934.1331| train_mse: 29561346124.00278| valid_rmsle: 7.16643 | valid_mae: 166876.7185| valid_rmse: 191297.68727| valid_mse: 36594805155.4034|  0:01:47s
epoch 135| loss: 28762844526.42166| train_rmsle: 6.98085 | train_mae: 153159.8475| train_rmse: 170024.0736| train_mse: 28908185604.41632| valid_rmsle: 6.45738 | valid_mae: 164995.8249| valid_rmse: 188519.56669| valid_mse: 35539627023.99134|  0:

epoch 165| loss: 26093411092.30174| train_rmsle: 5.16841 | train_mae: 145625.84809| train_rmse: 162181.87514| train_mse: 26302960624.2569| valid_rmsle: 4.92987 | valid_mae: 157508.71661| valid_rmse: 180583.49272| valid_mse: 32610397842.61646|  0:02:12s
epoch 166| loss: 25940797499.41972| train_rmsle: 5.13735 | train_mae: 144717.14106| train_rmse: 160386.49464| train_mse: 25723827662.85968| valid_rmsle: 4.84283 | valid_mae: 156207.9978| valid_rmse: 178229.49378| valid_mse: 31765752452.4643|  0:02:12s
epoch 167| loss: 25766460138.70793| train_rmsle: 4.6622  | train_mae: 143839.04342| train_rmse: 159752.69643| train_mse: 25520924016.504| valid_rmsle: 4.54827 | valid_mae: 155267.57715| valid_rmse: 177522.36314| valid_mse: 31514189413.3265|  0:02:13s
epoch 168| loss: 25753635008.12379| train_rmsle: 4.85737 | train_mae: 144694.93533| train_rmse: 161195.55683| train_mse: 25984007541.49614| valid_rmsle: 4.71956 | valid_mae: 156217.085| valid_rmse: 179057.3687| valid_mse: 32061541284.0866|  0:0

epoch 198| loss: 22823864545.79497| train_rmsle: 3.58637 | train_mae: 132823.57014| train_rmse: 146663.92605| train_mse: 21510307205.08029| valid_rmsle: 3.67256 | valid_mae: 143628.40872| valid_rmse: 163286.25447| valid_mse: 26662400900.1789|  0:02:38s
epoch 199| loss: 22725377667.71374| train_rmsle: 3.8678  | train_mae: 135780.81133| train_rmse: 150139.24555| train_mse: 22541793054.79801| valid_rmsle: 3.72799 | valid_mae: 146170.49636| valid_rmse: 166563.29673| valid_mse: 27743331816.16538|  0:02:39s
epoch 200| loss: 22623093876.8588| train_rmsle: 3.72388 | train_mae: 133294.89861| train_rmse: 146454.56015| train_mse: 21448938188.36182| valid_rmsle: 3.56754 | valid_mae: 143057.29859| valid_rmse: 161839.16717| valid_mse: 26191916030.22424|  0:02:39s
epoch 201| loss: 22498732253.83366| train_rmsle: 3.52045 | train_mae: 132347.29389| train_rmse: 146929.18538| train_mse: 21588185516.5225| valid_rmsle: 3.28005 | valid_mae: 142890.07077| valid_rmse: 163720.30788| valid_mse: 26804339212.3120

epoch 231| loss: 19326447338.70793| train_rmsle: 2.37383 | train_mae: 124287.78293| train_rmse: 137413.48364| train_mse: 18882465485.59968| valid_rmsle: 1.96185 | valid_mae: 132385.71999| valid_rmse: 150662.54568| valid_mse: 22699202670.6756|  0:03:04s
epoch 232| loss: 19308551183.84526| train_rmsle: 2.39355 | train_mae: 124621.74545| train_rmse: 138831.96136| train_mse: 19274313494.55854| valid_rmsle: 1.88951 | valid_mae: 132601.10193| valid_rmse: 152150.76243| valid_mse: 23149854508.53801|  0:03:05s
epoch 233| loss: 19049444591.65957| train_rmsle: 2.36871 | train_mae: 126564.63279| train_rmse: 140223.49324| train_mse: 19662628057.50084| valid_rmsle: 1.91805 | valid_mae: 134646.72398| valid_rmse: 153635.88996| valid_mse: 23603986684.2674|  0:03:06s
epoch 234| loss: 19351319332.147| train_rmsle: 2.86623 | train_mae: 131508.96744| train_rmse: 145862.94563| train_mse: 21275998906.49861| valid_rmsle: 2.33464 | valid_mae: 139411.30008| valid_rmse: 158952.33169| valid_mse: 25265843749.89154

epoch 264| loss: 16048372022.96325| train_rmsle: 1.79155 | train_mae: 109352.77697| train_rmse: 120264.05692| train_mse: 14463443388.016| valid_rmsle: 1.31537 | valid_mae: 114049.42104| valid_rmse: 129287.7521| valid_mse: 16715322844.13071|  0:03:31s
epoch 265| loss: 15890629138.81625| train_rmsle: 1.90145 | train_mae: 113046.97165| train_rmse: 124923.25917| train_mse: 15605820681.09534| valid_rmsle: 1.52868 | valid_mae: 119502.02391| valid_rmse: 135332.3684| valid_mse: 18314849935.44992|  0:03:32s
epoch 266| loss: 15797161045.16828| train_rmsle: 1.93789 | train_mae: 113858.91433| train_rmse: 125611.91075| train_mse: 15778352121.6907| valid_rmsle: 1.5705  | valid_mae: 120405.56656| valid_rmse: 135970.5965| valid_mse: 18488003111.36326|  0:03:32s
epoch 267| loss: 15845286942.70019| train_rmsle: 1.87797 | train_mae: 112138.74375| train_rmse: 124390.03185| train_mse: 15472880022.44717| valid_rmsle: 1.42084 | valid_mae: 116598.88754| valid_rmse: 132921.83047| valid_mse: 17668213015.53298| 

epoch 297| loss: 12838891634.87814| train_rmsle: 1.37906 | train_mae: 105541.17519| train_rmse: 116109.58917| train_mse: 13481436697.63648| valid_rmsle: 1.42436 | valid_mae: 112833.63849| valid_rmse: 126191.79621| valid_mse: 15924369429.80553|  0:03:57s
epoch 298| loss: 12631134291.18762| train_rmsle: 1.13271 | train_mae: 102572.70522| train_rmse: 114071.88815| train_mse: 13012395666.70982| valid_rmsle: 1.27998 | valid_mae: 111354.23497| valid_rmse: 125902.54509| valid_mse: 15851450858.89518|  0:03:58s
epoch 299| loss: 12552708298.02708| train_rmsle: 0.96522 | train_mae: 95386.08634| train_rmse: 105642.08342| train_mse: 11160249789.93904| valid_rmsle: 1.09895 | valid_mae: 102675.43896| valid_rmse: 115674.97404| valid_mse: 13380699618.27902|  0:03:58s
epoch 300| loss: 12412211449.56286| train_rmsle: 1.09198 | train_mae: 91599.71682| train_rmse: 102077.65997| train_mse: 10419848664.93708| valid_rmsle: 1.21925 | valid_mae: 98839.16158| valid_rmse: 111660.27083| valid_mse: 12468016082.3363

epoch 330| loss: 9922411546.73888| train_rmsle: 0.65933 | train_mae: 74805.23126| train_rmse: 82285.03195| train_mse: 6770826482.46026| valid_rmsle: 0.61718 | valid_mae: 78840.61992| valid_rmse: 87896.72679| valid_mse: 7725834579.99722|  0:04:23s
epoch 331| loss: 9888481244.34816| train_rmsle: 0.6064  | train_mae: 77247.51119| train_rmse: 88228.13753| train_mse: 7784204251.32729| valid_rmsle: 0.62522 | valid_mae: 83697.23801| valid_rmse: 96859.97222| valid_mse: 9381854218.76481|  0:04:24s
epoch 332| loss: 9708508135.24178| train_rmsle: 0.59029 | train_mae: 77414.85738| train_rmse: 86852.5897| train_mse: 7543372336.76214| valid_rmsle: 0.54573 | valid_mae: 81679.14551| valid_rmse: 92326.54708| valid_mse: 8524191296.2723|  0:04:25s
epoch 333| loss: 9310666943.13346| train_rmsle: 0.65595 | train_mae: 80935.76239| train_rmse: 90902.92634| train_mse: 8263342016.83524| valid_rmsle: 0.59435 | valid_mae: 85707.63216| valid_rmse: 97309.76071| valid_mse: 9469189528.99705|  0:04:26s
epoch 334| los

epoch 364| loss: 7365410103.45841| train_rmsle: 0.46163 | train_mae: 69276.85233| train_rmse: 77049.30769| train_mse: 5936595815.02792| valid_rmsle: 0.35103 | valid_mae: 71278.44199| valid_rmse: 80336.58968| valid_mse: 6453967642.1077|  0:04:50s
epoch 365| loss: 7238418110.14313| train_rmsle: 0.4473  | train_mae: 73213.06553| train_rmse: 86283.73638| train_mse: 7444883163.3437| valid_rmsle: 0.30596 | valid_mae: 76046.6726| valid_rmse: 93169.71324| valid_mse: 8680595465.37995|  0:04:51s
epoch 366| loss: 7137482661.88008| train_rmsle: 0.37202 | train_mae: 63377.74169| train_rmse: 71785.36322| train_mse: 5153138373.18028| valid_rmsle: 0.25316 | valid_mae: 64905.60228| valid_rmse: 76096.8272| valid_mse: 5790727109.41182|  0:04:52s
epoch 367| loss: 6845933885.89555| train_rmsle: 0.43592 | train_mae: 73610.97632| train_rmse: 85703.73657| train_mse: 7345130462.67839| valid_rmsle: 0.32326 | valid_mae: 77074.03364| valid_rmse: 91656.97164| valid_mse: 8401000450.63268|  0:04:53s
epoch 368| loss:

epoch 398| loss: 5077995646.76209| train_rmsle: 0.18525 | train_mae: 55788.63933| train_rmse: 68959.80468| train_mse: 4755454660.96929| valid_rmsle: 0.17546 | valid_mae: 60684.3206| valid_rmse: 76234.23133| valid_mse: 5811658026.95927|  0:05:17s
epoch 399| loss: 5107435688.3559| train_rmsle: 0.18419 | train_mae: 54885.37404| train_rmse: 64952.61388| train_mse: 4218842049.5867| valid_rmsle: 0.13479 | valid_mae: 57078.46137| valid_rmse: 68570.12875| valid_mse: 4701862556.8275|  0:05:18s
epoch 400| loss: 5178539497.22244| train_rmsle: 0.17537 | train_mae: 55943.13498| train_rmse: 70245.4164| train_mse: 4934418525.76355| valid_rmsle: 0.13861 | valid_mae: 60635.30888| valid_rmse: 77988.93848| valid_mse: 6082274525.85878|  0:05:18s
epoch 401| loss: 4932874873.81044| train_rmsle: 0.22123 | train_mae: 58445.93994| train_rmse: 65626.15417| train_mse: 4306792111.63518| valid_rmsle: 0.20655 | valid_mae: 61044.51237| valid_rmse: 68937.37618| valid_mse: 4752361834.01441|  0:05:19s
epoch 402| loss: 

epoch 432| loss: 3407345295.59768| train_rmsle: 0.14893 | train_mae: 49112.99321| train_rmse: 59287.10018| train_mse: 3514960248.26808| valid_rmsle: 0.13754 | valid_mae: 52690.77922| valid_rmse: 63958.63009| valid_mse: 4090706362.67808|  0:05:43s
epoch 433| loss: 3849369625.74855| train_rmsle: 0.11756 | train_mae: 43310.21559| train_rmse: 57350.285| train_mse: 3289055189.0204| valid_rmsle: 0.10458 | valid_mae: 46898.26801| valid_rmse: 61902.81675| valid_mse: 3831958721.8149|  0:05:44s
epoch 434| loss: 3422457440.0619| train_rmsle: 0.10513 | train_mae: 35290.92268| train_rmse: 43125.92489| train_mse: 1859845397.70349| valid_rmsle: 0.0887  | valid_mae: 35497.35426| valid_rmse: 41623.09596| valid_mse: 1732482117.2401|  0:05:45s
epoch 435| loss: 3297527203.15667| train_rmsle: 0.10755 | train_mae: 36624.4834| train_rmse: 47959.158| train_mse: 2300080835.73806| valid_rmsle: 0.09337 | valid_mae: 39608.31651| valid_rmse: 51191.46874| valid_mse: 2620566471.60848|  0:05:46s
epoch 436| loss: 3120

In [91]:
preds = clf.predict(X_test)

y_true = y_test

test_score = mean_absolute_percentage_error(y_pred=preds, y_true=y_true)
print("Test MAPE:", test_score)
print("Test CUSTOM SCORE", CustomMetrics(y_true, 0.95*preds))

Test MAPE: 0.2058983558500036
Test CUSTOM SCORE [0.99313893]


In [86]:
id_score = np.array(list(df_test['id'])).reshape(-1, 1)
X_score = df_test[features].to_numpy()
y_score = clf.predict(X_score)*0.94
print(id_score)

result = pd.DataFrame(data = np.concatenate((id_score, y_score), axis = 1),\
                      columns = ['id', 'per_square_meter_price'])
result.to_csv("puplic_prediction.csv", index = False)

[['COL_289284']
 ['COL_289305']
 ['COL_289318']
 ...
 ['COL_455261']
 ['COL_455381']
 ['COL_455397']]
