In [None]:
import os

import lightgbm as lgbm
import numpy as np
import pandas as pd
import yaml
from tqdm import tqdm as tqdm

import main
import utils

## Loading Config File

In [2]:
with open('config.yaml') as f:
    cfg=yaml.load(f,Loader=yaml.FullLoader)

## Loading features and labels

In [None]:

if cfg["task"]==1:
    bucket_paths=cfg["bucket_paths_next_year"]
elif cfg["task"]==5:
    bucket_paths=cfg["bucket_paths_five_year"]


labels = utils.files_in_dir(bucket_paths["labels"], "label.tif")
features = utils.files_in_dir(bucket_paths["features"], "urban_feat.tif")
custom_features=utils.files_in_dir(bucket_paths["customs"],"urban_custom_feat.tif")
test_index=main.return_indexes(features, cfg["pred_year"])
train_index=main.return_indexes(features, cfg["train_year"])

if(cfg["task"]==1):
    train_index_prev=train_index-1
    test_index_prev=test_index-1
    train_index_custom=train_index-5
    test_index_custom=test_index-5
elif(cfg["task"]==5):
    train_index_prev=train_index-5
    test_index_prev=test_index-5
    train_index_custom=train_index-5
    test_index_custom=test_index-5
print("train ",features[train_index],labels[train_index_prev],custom_features[train_index_custom],labels[train_index])
print("test",features[test_index],labels[test_index_prev],custom_features[test_index_custom],labels[test_index])


train_prev_labels= utils.load_tiff(labels[train_index_prev])
train_prev_features= utils.load_tiff(features[train_index_prev])
train_custom_features=utils.load_tiff(custom_features[train_index_custom])
train_features= utils.load_tiff(features[train_index])
train_labels=utils.load_tiff(labels[train_index])

test_prev_features= utils.load_tiff(features[test_index_prev])
test_prev_labels= utils.load_tiff(labels[test_index_prev])
test_custom_features=utils.load_tiff(custom_features[test_index_custom])
test_features= utils.load_tiff(features[test_index])
test_labels=utils.load_tiff(labels[test_index])

train_feature=np.concatenate((train_features.data[:10,:,:],train_features.data[11:,:,:], train_prev_labels.data,train_custom_features.data), axis=0)
test_feature=np.concatenate((test_features.data[:10,:,:],test_features.data[11:,:,:], test_prev_labels.data,test_custom_features.data), axis=0)
train_label=train_labels.data
test_label=test_labels.data









train  gs://earth-engine-seminar/urbanization/data/export_16012025/2022-01-01/urban_feat.tif gs://earth-engine-seminar/urbanization/data/export_07022025/2021-01-01/urban_label.tif gs://earth-engine-seminar/urbanization/data/export_07022025/2022-01-01/urban_custom_feat.tif gs://earth-engine-seminar/urbanization/data/export_07022025/2022-01-01/urban_label.tif
test gs://earth-engine-seminar/urbanization/data/export_16012025/2023-01-01/urban_feat.tif gs://earth-engine-seminar/urbanization/data/export_07022025/2022-01-01/urban_label.tif gs://earth-engine-seminar/urbanization/data/export_07022025/2023-01-01/urban_custom_feat.tif gs://earth-engine-seminar/urbanization/data/export_07022025/2023-01-01/urban_label.tif


## Data Pre-processing

In [5]:

x_train,y_train,x_val,y_val,x_test, y_test, test_mask=main.pre_process(train_feature,train_label,test_feature, test_label, cfg["block_coverage"], cfg["total_blocks"],cfg["test_size"])

### Training

In [6]:
s_weights=main.multiclass_temoral_class_weights(y_train)
model = lgbm.LGBMClassifier(objective="multiclass", num_class=4, class_weight=s_weights)
best_model=main.train(model,x_train,y_train,x_val,y_val, [6,8,9,13])



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.177078 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2800
[LightGBM] [Info] Number of data points in the train set: 2273956, number of used features: 18
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294
[LightGBM] [Info] Start training from score -1.386294


In [7]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
print(x_val.shape)
print(y_val.shape)

(2273956, 18)
(2273956,)
(3314341, 18)
(3314341,)
(940500, 18)
(940500,)


## Prediction

In [8]:
test_pred,cm,report,auc_roc=main.predict(best_model,x_test,y_test)
print(test_pred)
print(cm)
print(auc_roc)



[3. 3. 3. ... 2. 3. 3.]
[[   7684       0       0      38]
 [      0    2101       0      17]
 [      0       0    6445      57]
 [     14      19      28 3297938]]
0.9998042999671155


## Saving 

In [None]:

test_profile=test_labels.profile
shape=(test_labels.metadata["height"],test_labels.metadata["width"])
valid_pred_mask=test_mask.flatten()
pred_array=np.full((shape[0]*shape[1]),np.nan)
pred_array[valid_pred_mask] = test_pred
pred_array = pred_array.reshape((1,shape[0], shape[1]))
utils.export_tiff(os.path.join(cfg["output_path"],f'{cfg["pred_year"]}.tiff'), test_profile,pred_array, ['urban'])


  dataset.write(data)  # Write the array to the first band


