In [None]:
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from blocksnet.analysis.land_use.prediction import SpatialClassifier
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)

In [None]:
# Загрузка данных
with open('data/cleaned_blocks.pkl', 'rb') as f:
    gdf = pickle.load(f)
gdf.groupby('land_use').size().sort_values(ascending=False)

gdf = gdf[gdf.city == 'Saint Petersburg'] # Опционально для тестового запуска
train_gdf, test_gdf = train_test_split(gdf, test_size=0.15, random_state=42, stratify=gdf['land_use'])

print(train_gdf.land_use.unique(), train_gdf.shape)
print(test_gdf.land_use.unique(), test_gdf.shape)

In [None]:
# 1. Инициализация и обучение
BASE_PARAMS = {"random_state": 42, "n_jobs": -1}
MODEL_PARAMS = {
    "rf": {"n_estimators": 200, "max_depth": 7, "class_weight": "balanced", **BASE_PARAMS},
    "xgb": {"n_estimators": 200, "max_depth": 7, "learning_rate": 0.05,
            "scale_pos_weight": 1, **BASE_PARAMS},
    "lgb": {"n_estimators": 200, "max_depth": 7, "learning_rate": 0.05,
            "class_weight": "balanced", **BASE_PARAMS},
    "cb": {"iterations": 200, "depth": 7, "learning_rate": 0.05,
           "thread_count": -1, "auto_class_weights": "Balanced", "random_seed": 42},
    "hgb": {"max_iter": 200, "max_depth": 7, "learning_rate": 0.05, "random_state": 42},
}
estimators = [
    ("rf",  RandomForestClassifier(**MODEL_PARAMS["rf"])),
    ("xgb", XGBClassifier(**MODEL_PARAMS["xgb"])),
    ("lgb", LGBMClassifier(**MODEL_PARAMS["lgb"])),
    ("hgb", HistGradientBoostingClassifier(**MODEL_PARAMS["hgb"])),
]
voting_params = {"voting": "soft", "n_jobs": -1}
classifier = SpatialClassifier(estimators, voting_params, 1000, 5)
classifier.train(train_gdf, target_col='land_use_code')

In [None]:
# 2. Предсказание на тестовых данных
test_predictions = classifier.predict(test_gdf)
test_probabilities = classifier.predict_proba(test_gdf)

In [None]:
# 3. Сохранение обучающих данных
classifier.save_train_data('data/results/train_data.geojson')

# 4. Сохранение тестовых данных с предсказаниями
classifier.save_predictions_to_geojson(
    test_gdf, 
    test_predictions, 
    test_probabilities,
    'data/results/test_predictions.geojson'
)

# 5. Сохранение ошибок
classifier.save_mistakes(test_gdf, test_predictions, 'data/results/mistakes.geojson')