In [1]:
from osgeo import gdal
import numpy as np
from sklearn.cluster import DBSCAN

data_path = '0_data/'


obs_outliers = gdal.Open(data_path + 'obs_outliers_denoise.tif')
obs_outliers_band = obs_outliers.GetRasterBand(1)
obs_outliers_data = obs_outliers.ReadAsArray()
row_length = obs_outliers.RasterYSize
col_length = obs_outliers.RasterXSize
tif_geotrans = obs_outliers.GetGeoTransform()
tif_proj = obs_outliers.GetProjection()

obs_out = []
for row in range(row_length):
    for col in range(col_length):
        obs_outif = obs_outliers_data[row, col]
        if obs_outif > 0:
            obs_out.append([row, col])

obs_out = np.array(obs_out)

In [2]:
eps = 4.4 * 2
min_samples = 35

clusters = DBSCAN(eps=eps, min_samples=min_samples).fit(obs_out)
obs_out_clusters = np.zeros((row_length, col_length))
for cluster_i in range(len(clusters.labels_)):
    cluster_id = clusters.labels_[cluster_i] + 1
    row = obs_out[cluster_i, 0]
    col = obs_out[cluster_i, 1]
    obs_out_clusters[row, col] = cluster_id
    

In [3]:
# Save to tif
driver = gdal.GetDriverByName('GTiff')
obs_out_clusters_dataset = driver.Create(data_path + 'obs_clusters.tif', col_length, row_length, 1, gdal.GDT_Int32)
obs_out_clusters_dataset.SetGeoTransform(tif_geotrans)
obs_out_clusters_dataset.SetProjection(tif_proj)
obs_out_clusters_dataset.GetRasterBand(1).WriteArray(obs_out_clusters)
# Save finish.
del obs_out_clusters_dataset