In [1]:
# In[1]: PART 1. IMPORT AND FUNCTIONS
#region
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import FeatureUnion
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer  
from sklearn.preprocessing import OneHotEncoder      
from statistics import mean
from sklearn.model_selection import KFold   
import joblib
# In[2]: PART 2. GET THE DATA 
raw_data = pd.read_csv('/kaggle/input/vn-housing-dataset/VN_housing_dataset_processed1.csv')

# In[3]: PART 3. DISCOVER THE DATA 
#region
# 3.1 Quick view of the data
print('\n____________ Dataset info ____________')
print(raw_data.info())              
print('\n____________ Statistics of numeric features ____________')
print(raw_data.describe()) 


____________ Dataset info ____________
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80773 entries, 0 to 80772
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Quận             80772 non-null  object 
 1   Huyện            80733 non-null  object 
 2   Loại hình nhà ở  80744 non-null  object 
 3   Giấy tờ pháp lý  52430 non-null  object 
 4   Số tầng          35667 non-null  float64
 5   Số phòng ngủ     80735 non-null  float64
 6   Diện tích        80773 non-null  float64
 7   Dài (m)          19294 non-null  float64
 8   Rộng (m)         34526 non-null  float64
 9   Giá (triệu/m2)   80773 non-null  float64
dtypes: float64(6), object(4)
memory usage: 6.2+ MB
None

____________ Statistics of numeric features ____________
            Số tầng  Số phòng ngủ     Diện tích        Dài (m)       Rộng (m)  \
count  35667.000000  80735.000000  80773.000000   19294.000000   34526.000000   
mean       4.433426   

In [2]:
# View unique values of categorical features
print(raw_data['Quận'].unique())
print(raw_data['Huyện'].unique())
print(raw_data['Loại hình nhà ở'].unique())
print(raw_data['Giấy tờ pháp lý'].unique())

['Quận Cầu Giấy' 'Quận Thanh Xuân' 'Quận Hai Bà Trưng' 'Quận Tây Hồ'
 'Quận Đống Đa' 'Quận Hà Đông' 'Huyện Thanh Trì' 'Quận Hoàng Mai'
 'Quận Long Biên' 'Quận Nam Từ Liêm' 'Quận Ba Đình' 'Huyện Hoài Đức'
 'Quận Bắc Từ Liêm' 'Huyện Đan Phượng' 'Huyện Thanh Oai' 'Huyện Sóc Sơn'
 'Huyện Gia Lâm' 'Huyện Chương Mỹ' 'Quận Hoàn Kiếm' 'Huyện Đông Anh'
 'Huyện Thường Tín' 'Thị xã Sơn Tây' 'Huyện Mê Linh' 'Huyện Thạch Thất'
 'Huyện Quốc Oai' 'Huyện Phúc Thọ' 'Huyện Phú Xuyên' 'Huyện Ba Vì' nan
 'Huyện Mỹ Đức']
['Phường Nghĩa Đô' 'Phường Kim Giang' 'Phường Minh Khai'
 'Phường Thụy Khuê' 'Phường Trung Liệt' 'Phường Đống Mác' 'Phường Xuân La'
 'Phường Văn Quán' 'Thị trấn Văn Điển' 'Phường Định Công' 'Phường Bồ Đề'
 'Phường Quang Trung' 'Phường Thanh Lương' 'Phường Khương Trung'
 'Phường Gia Thụy' 'Phường Khương Đình' 'Phường Phương Canh'
 'Phường Tương Mai' 'Phường La Khê' 'Phường Mễ Trì' 'Phường Khương Mai'
 'Phường Láng Hạ' 'Phường Quan Hoa' 'Phường Tây Mỗ' 'Phường Ngọc Khánh'
 'Phường Đại Mỗ' 'X

In [3]:
# In[4]: PART 4. PREPARE THE DATA 
# 4.2 Split training-test sets
from sklearn.model_selection import train_test_split

# 4.4 Define pipelines for processing data. 
# Define ColumnSelector: a transformer for choosing columns:
class ColumnSelector(BaseEstimator, TransformerMixin):
    def __init__(self, feature_names):
        self.feature_names = feature_names
    def fit(self, dataframe, labels=None):
        return self
    def transform(self, dataframe):
        return dataframe[self.feature_names].values    
cat_feat_names = ['Quận', 'Huyện', 'Loại hình nhà ở', 'Giấy tờ pháp lý'] 
num_feat_names = ['Số tầng', 'Số phòng ngủ', 'Diện tích', 'Dài (m)', 'Rộng (m)'] 

# Pipeline for categorical features:
cat_pipeline = Pipeline([
    ('selector', ColumnSelector(cat_feat_names)),
    ('imputer', SimpleImputer(missing_values=np.nan, strategy="constant", fill_value = "NO INFO", copy=True)),
    ('cat_encoder', OneHotEncoder(handle_unknown="ignore")) ])    

# Pipeline for numerical features:
num_pipeline = Pipeline([
    ('selector', ColumnSelector(num_feat_names)),
    ('imputer', SimpleImputer(missing_values=np.nan, strategy="median", copy=True)),  
    ('std_scaler', StandardScaler(with_mean=True, with_std=True, copy=True)) ])  
  
# Combine features transformed by two above pipelines:
full_pipeline = FeatureUnion(transformer_list=[
    ("num_pipeline", num_pipeline),
    ("cat_pipeline", cat_pipeline) ])  

# Define the target column (adjust 'target_col' to match the actual label column in your dataset)
target_col = "Giá (triệu/m2)"  # Change this to the actual target variable

# Split features (X) and target labels (y)
X = raw_data.drop(columns=[target_col])  # Features
y = raw_data[target_col]  # Labels

# Create train and test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply preprocessing pipeline
X_train_processed = full_pipeline.fit_transform(X_train)
X_test_processed = full_pipeline.transform(X_test)
print('\n____________ Processed feature values ____________')
print(X_train_processed[[0, 1, 2],:].toarray())
print(X_train_processed.shape)
# print('We have %d numeric feature + 1 added features + 35 cols of onehotvector for categorical features.' %(len(num_feat_names)))
joblib.dump(full_pipeline, r'/kaggle/working/full_pipeline.pkl')


____________ Processed feature values ____________
[[ 0.22792003 -0.612713   -0.24432111 -0.00628668 -0.00708703  0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          1.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.       

['/kaggle/working/full_pipeline.pkl']

In [4]:
# In[5]: PART 5. TRAIN AND EVALUATE MODELS 
#region
# 5.1 Try LinearRegression model
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train_processed, y_train)
print('\n____________ LinearRegression ____________')
print('Learned parameters: ', model.coef_)


____________ LinearRegression ____________
Learned parameters:  [ 3.11802432e+00  7.93330291e+00 -9.15711421e-01 -1.03494865e-01
  1.70674879e-01 -4.67527378e+01 -4.04974805e+01 -1.10753955e+01
 -1.30549128e+01 -5.11631831e+01 -3.73547071e+01 -1.90570066e+01
  8.41356549e+00 -5.14783639e+01 -4.13037429e+01  2.84492872e+01
 -3.25562020e+01 -6.15644830e+01 -4.63715643e+01 -1.54624404e+01
 -2.10630674e+00  2.00522537e+01  5.53493287e+01  4.64221083e+01
  2.18891153e+02  3.35857066e+00  6.25914290e-01 -4.38697418e+00
  6.10529513e+00  5.55978420e+01  6.38373582e+01  3.59473565e+01
 -6.88645324e+01 -2.14320309e+01 -4.10474098e+01 -1.12957042e+01
  1.58596111e+02 -1.09721714e+01 -1.89094781e+01 -1.32409335e+01
  2.64146571e+01 -1.93051140e+02  1.59115131e+01  2.09997440e+01
 -8.56783206e+00  5.50649549e+01 -3.55588205e+00 -3.60797457e+00
 -7.55936863e+01 -6.18839466e+01 -6.16103512e-01 -8.22812421e+00
 -9.06462003e+00 -5.70910604e+00  2.67059852e+01 -1.66999362e+01
  1.80086750e+01  6.20354

In [5]:
# Compute R2 score and root mean squared error
def r2score_and_rmse(model, train_data, labels): 
    r2score = model.score(train_data, labels)
    from sklearn.metrics import mean_squared_error
    prediction = model.predict(train_data)
    mse = mean_squared_error(labels, prediction)
    rmse = np.sqrt(mse)
    return r2score, rmse      
r2score, rmse = r2score_and_rmse(model, X_train_processed, y_train)
print('\nR2 score (on training data, best=1):', r2score)
print("Root Mean Square Error: ", rmse.round(decimals=1))


R2 score (on training data, best=1): 0.331349434853627
Root Mean Square Error:  50.1


In [6]:
# Predict labels for some training instances:
print("\nInput data: \n", X_train.iloc[0:9])
print("\nPredictions: ", model.predict(X_train_processed[0:9]).round(decimals=1))
print("Labels:      ", list(y_train[0:9]))

# Store models to files, to compare latter:
def store_model(model, model_name = ""):
    if model_name == "": 
        model_name = type(model).__name__
    joblib.dump(model,'/kaggle/working/' + model_name + '_model.pkl')
def load_model(model_name):
    model = joblib.load('/kaggle/working/' + model_name + '_model.pkl')
    return model
store_model(model)


Input data: 
                     Quận                Huyện        Loại hình nhà ở  \
57914       Quận Đống Đa  Phường Quốc Tử Giám  Nhà mặt phố, mặt tiền   
47549   Quận Nam Từ Liêm     Phường Mỹ Đình 1           Nhà ngõ, hẻm   
13623       Quận Hà Đông      Phường Vạn Phúc           Nhà ngõ, hẻm   
2262      Quận Hoàng Mai      Phường Mai Động           Nhà ngõ, hẻm   
4389        Quận Đống Đa     Phường Ô Chợ Dừa           Nhà ngõ, hẻm   
61604  Quận Hai Bà Trưng      Phường Vĩnh Tuy           Nhà ngõ, hẻm   
77455    Quận Thanh Xuân    Phường Nhân Chính  Nhà mặt phố, mặt tiền   
54998    Quận Thanh Xuân   Phường Khương Đình           Nhà ngõ, hẻm   
45546     Quận Hoàng Mai      Phường Lĩnh Nam           Nhà ngõ, hẻm   

      Giấy tờ pháp lý  Số tầng  Số phòng ngủ  Diện tích  Dài (m)  Rộng (m)  
57914        Đã có sổ      NaN           3.0       35.0      NaN       5.0  
47549        Đã có sổ      NaN           3.0       52.0     11.0       5.0  
13623        Đã có sổ      4.0   

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


In [7]:
#%% 5.2 Try DecisionTreeRegressor model
# Training:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor()
model.fit(X_train_processed, y_train)
# Compute R2 score and root mean squared error:
print('\n____________ DecisionTreeRegressor ____________')
r2score, rmse = r2score_and_rmse(model, X_train_processed, y_train)
print('\nR2 score (on training data, best=1):', r2score)
print("Root Mean Square Error: ", rmse.round(decimals=1))
store_model(model)
# Predict labels for some training instances:
print("\nPredictions: ", model.predict(X_train_processed[0:9]).round(decimals=1))
print("Labels:      ", list(y_train[0:9]))


____________ DecisionTreeRegressor ____________

R2 score (on training data, best=1): 0.9345191317243118
Root Mean Square Error:  15.7

Predictions:  [351.4  94.2  69.7  84.4  75.8  87.2 464.3  78.1  75.1]
Labels:       [351.43, 94.23, 69.7, 84.44, 75.76, 93.75, 464.29, 78.57, 71.21]


In [8]:
#%% 5.3 Try RandomForestRegressor model
# Training:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor(n_estimators = 5) 
model.fit(X_train_processed, y_train)
# Compute R2 score and root mean squared error:
print('\n____________ RandomForestRegressor ____________')
r2score, rmse = r2score_and_rmse(model, X_train_processed, y_train)
print('\nR2 score (on training data, best=1):', r2score)
print("Root Mean Square Error: ", rmse.round(decimals=1))
store_model(model)      
# Predict labels for some training instances:
print("\nPredictions: ", model.predict(X_train_processed[0:9]).round(decimals=1))
print("Labels:      ", list(y_train[0:9]))


____________ RandomForestRegressor ____________

R2 score (on training data, best=1): 0.8174206349146402
Root Mean Square Error:  26.2

Predictions:  [224.1  87.1  71.5  86.7 121.2  85.4 464.3  78.3  74.6]
Labels:       [351.43, 94.23, 69.7, 84.44, 75.76, 93.75, 464.29, 78.57, 71.21]


In [9]:
import optuna
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
# full_pipeline = joblib.load(r'/kaggle/working/full_pipeline.pkl')
# processed_test_set = full_pipeline.transform(test_set)
def objective(trial):
    params = {
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2, log=True),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "n_estimators": trial.suggest_int("n_estimators", 100, 500),
        "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0)
    }
    model = XGBRegressor(**params)
    model.fit(X_train_processed, y_train)
    y_pred = model.predict(X_test_processed)
    return mean_squared_error(y_test, y_pred, squared=False)

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

print("Best parameters:", study.best_params)


[I 2025-02-19 14:51:50,550] A new study created in memory with name: no-name-e17c8e56-4445-4b6d-8482-4cb324143476
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0)
[I 2025-02-19 14:51:51,282] Trial 0 finished with value: 48.19443905786588 and parameters: {'learning_rate': 0.09604580786524547, 'max_depth': 7, 'n_estimators': 119, 'subsample': 0.6684344387123351, 'colsample_bytree': 0.9562698324610981}. Best is trial 0 with value: 48.19443905786588.
  "subsample": trial.suggest_uniform("subsample", 0.5, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.5, 1.0)
[I 2025-02-19 14:51:52,411] Trial 1 finished with value: 49.786766219050726 and parameters: {'learning_rate': 0.017404036425146936, 'max_depth': 6, 'n_estimators': 237, 'subsample': 0.5557699250555741, 'colsample_bytree': 0.964088929966191}. Best is trial 0 with value: 48.19443905786588.
  "subsample": trial.suggest_uniform("s

Best parameters: {'learning_rate': 0.0673303483482099, 'max_depth': 10, 'n_estimators': 409, 'subsample': 0.6913465657131193, 'colsample_bytree': 0.5227833901705242}


In [10]:
# Define the model
# xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42)
xgb_model = XGBRegressor(learning_rate=0.049456198506249625, max_depth=10, n_estimators=456, subsample=0.7489727392768031, colsample_bytree=0.5084291551626028, random_state=42)
xgb_model_1 = XGBRegressor(learning_rate=0.1324273781277113, max_depth=9, n_estimators=187, subsample=0.9565974763846247, colsample_bytree=0.6172761056188125, random_state=42)
# xgb_model = XGBRegressor(learning_rate=0.04925959031633168, max_depth=9, n_estimators=451, subsample=0.7489727392768031, colsample_bytree=0.5084291551626028, random_state=42)
# 'learning_rate': 0.049456198506249625, 'max_depth': 10, 'n_estimators': 456, 'subsample': 0.7489727392768031, 'colsample_bytree': 0.5084291551626028
# 'learning_rate': 0.04925959031633168, 'max_depth': 9, 'n_estimators': 451, 'subsample': 0.7231522113107695, 'colsample_bytree': 0.5324314811897863
# Train the model
xgb_model.fit(X_train_processed, y_train)
# xgb_model_1.fit(X_train_processed, y_train)
r2score, rmse = r2score_and_rmse(xgb_model, X_train_processed, y_train)
print('\nR2 score (on training data, best=1):', r2score)
print("Root Mean Square Error: ", rmse.round(decimals=1))
store_model(model, model_name="XGBoost_tryagain")
# Predict labels for some training instances:
print("\nPredictions: ", xgb_model.predict(X_train_processed[0:9]).round(decimals=1))
print("Labels:      ", list(y_train[0:9]))

# # Make predictions
# y_pred = xgb_model.predict(X_test)

# # Evaluate performance
# rmse = mean_squared_error(y_test, y_pred, squared=False)
# print("RMSE:", rmse)



R2 score (on training data, best=1): 0.6541787963168783
Root Mean Square Error:  36.0

Predictions:  [175.2  89.5  80.4  77.1 114.4  94.2 233.5  79.7  72.9]
Labels:       [351.43, 94.23, 69.7, 84.44, 75.76, 93.75, 464.29, 78.57, 71.21]


In [11]:
#%% 5.5 EVALUATE MODELS
from sklearn.model_selection import cross_val_score
print('\n____________ K-fold cross validation ____________')
run_new_evaluation = True
if run_new_evaluation:
    cv = KFold(n_splits=5,shuffle=True,random_state=37) 

    # Evaluate LinearRegression:
    model_name = "LinearRegression" 
    model = LinearRegression()             
    nmse_scores = cross_val_score(model, X_train_processed, y_train, cv=cv, scoring='neg_mean_squared_error')
    rmse_scores = np.sqrt(-nmse_scores)
    joblib.dump(rmse_scores,'/kaggle/working/' + model_name + '_rmse.pkl')
    print("LinearRegression rmse: ", rmse_scores.round(decimals=1))
    print("Avg. rmse: ", mean(rmse_scores.round(decimals=1)),'\n')

    # Evaluate DecisionTreeRegressor:
    model_name = "DecisionTreeRegressor" 
    model = DecisionTreeRegressor()
    nmse_scores = cross_val_score(model, X_train_processed, y_train, cv=cv, scoring='neg_mean_squared_error')
    rmse_scores = np.sqrt(-nmse_scores)
    joblib.dump(rmse_scores,'/kaggle/working/' + model_name + '_rmse.pkl')
    print("DecisionTreeRegressor rmse: ", rmse_scores.round(decimals=1))
    print("Avg. rmse: ", mean(rmse_scores.round(decimals=1)),'\n')

    # Evaluate RandomForestRegressor:
    model_name = "RandomForestRegressor" 
    model = RandomForestRegressor(n_estimators = 5)
    nmse_scores = cross_val_score(model, X_train_processed, y_train, cv=cv, scoring='neg_mean_squared_error')
    rmse_scores = np.sqrt(-nmse_scores)
    joblib.dump(rmse_scores,'/kaggle/working/' + model_name + '_rmse.pkl')
    print("RandomForestRegressor rmse: ", rmse_scores.round(decimals=1))
    print("Avg. rmse: ", mean(rmse_scores.round(decimals=1)),'\n')

    # Evaluate XGBoost
    model_name = "XGBoost" 
    model = XGBRegressor(learning_rate=0.049456198506249625, max_depth=10, n_estimators=456, subsample=0.7489727392768031, colsample_bytree=0.5084291551626028, random_state=42)
    nmse_scores = cross_val_score(model, X_train_processed, y_train, cv=cv, scoring='neg_mean_squared_error')
    rmse_scores = np.sqrt(-nmse_scores)
    joblib.dump(rmse_scores,'/kaggle/working/' + model_name + '_rmse.pkl')
    print("XGBoost rmse: ", rmse_scores.round(decimals=1))
    print("Avg. rmse: ", mean(rmse_scores.round(decimals=1)),'\n')


____________ K-fold cross validation ____________
LinearRegression rmse:  [46.5 51.7 51.  52.8 51.6]
Avg. rmse:  50.72 

DecisionTreeRegressor rmse:  [59.6 60.4 62.  60.4 58.9]
Avg. rmse:  60.26 

RandomForestRegressor rmse:  [46.9 50.7 51.1 51.9 50.7]
Avg. rmse:  50.26 

XGBoost rmse:  [40.9 47.1 45.7 46.6 45. ]
Avg. rmse:  45.06 



In [12]:
# # In[7]: PART 7. ANALYZE AND TEST THE BEST MODEL
# #region:
# # 7.1 Pick the best model (random forest):
# search = joblib.load('/kaggle/working/RandomForestRegressor_randsearch.pkl')
# best_model = search.best_estimator_
# # best_model = joblib.load('/kaggle/working/RandomForestRegressor_model.pkl')
# # 7.2 Analyse the solution to get more insights about the data:
# # NOTE: ONLY for rand forest
# print('\n____________ ANALYZE AND TEST YOUR SOLUTION ____________')
# print('SOLUTION: ' , best_model)
# store_model(best_model, model_name="SOLUTION")


FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/working/RandomForestRegressor_randsearch.pkl'

In [13]:
#%% 7.3 Run on test data:
# full_pipeline = joblib.load(r'/kaggle/working/full_pipeline.pkl')
best_model = joblib.load(r'/kaggle/working/XGBoost_tryagain_model.pkl')
best_model.fit(X_train_processed, y_train)
# processed_test_set = full_pipeline.transform(test_set)  
# Compute R2 score and root mean squared error:
r2score, rmse = r2score_and_rmse(best_model, X_test_processed, y_test)
print('\nPerformance on test data:')
print('R2 score (on test data, best=1):', r2score)
print("Root Mean Square Error: ", rmse.round(decimals=1))
# Predict labels for some test instances:
print("\nTest data: \n", X_test.iloc[0:9])
print("\nProcessed_test_set: \n", X_test_processed[0:9])
print("\nPredictions: ", best_model.predict(X_test_processed[0:9]).round(decimals=1))
print("Labels:      ", list(y_test[0:9]),'\n')

#endregion


Performance on test data:
R2 score (on test data, best=1): 0.32790747796145026
Root Mean Square Error:  51.6

Test data: 
                     Quận               Huyện        Loại hình nhà ở  \
6658      Quận Hoàng Mai      Phường Đại Kim           Nhà ngõ, hẻm   
54854       Quận Hà Đông     Phường Vạn Phúc           Nhà ngõ, hẻm   
5013      Quận Hoàng Mai   Phường Thịnh Liệt           Nhà ngõ, hẻm   
13466        Quận Tây Hồ    Phường Thụy Khuê           Nhà ngõ, hẻm   
18698   Quận Bắc Từ Liêm    Phường Cổ Nhuế 2  Nhà mặt phố, mặt tiền   
76040     Quận Hoàng Mai     Phường Giáp Bát  Nhà mặt phố, mặt tiền   
2054      Quận Hoàng Mai    Phường Tương Mai           Nhà ngõ, hẻm   
8703   Quận Hai Bà Trưng    Phường Quỳnh Mai           Nhà ngõ, hẻm   
75895     Huyện Hoài Đức  Thị trấn Trạm Trôi           Nhà biệt thự   

      Giấy tờ pháp lý  Số tầng  Số phòng ngủ  Diện tích  Dài (m)  Rộng (m)  
6658              NaN      NaN           3.0       33.0      NaN       8.0  
54854      

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
