In [None]:
import geopandas as gpd
import pandas as pd
from glob import glob
from shapely.geometry import Polygon, MultiPolygon, shape, Point, box
from tqdm import tqdm
import geohash
data = []

from colormap import rgb2hex


_class = [
    {'class': 'PASTURE', 'selected': False, 'rgba': '237, 222, 142, 77'},
    {'class': 'FOREST FORMATION', 'selected': False, 'rgba': '50, 166, 94, 77'},
    {'class': 'AGRICULTURE', 'selected': False, 'rgba': '233, 116, 237, 77'},
    {'class': 'AGROPEC', 'selected': False, 'rgba': '255, 255, 178, 77'},
    {'class': 'FOREST PLANT', 'selected': False, 'rgba': '122, 89, 0, 77'},
    {'class': 'URBAN', 'selected': False, 'rgba': '212, 39, 30, 77'},
    {'class': 'SAVANNA', 'selected': False, 'rgba': '125, 201, 117, 77'},
    {'class': 'WATER', 'selected': False, 'rgba': '0, 0, 255, 77'},
    {'class': 'FLOODED AREA', 'selected': False, 'rgba': '2, 105, 117, 77'},
    {'class': 'WOODED SANDBANK', 'selected': False, 'rgba': '2, 214, 89, 77'},
    {'class': 'GRASSLAND', 'selected': False, 'rgba': '214, 188, 116, 77'},
    {'class': 'COFF', 'selected': False, 'rgba': '214, 143, 226, 77'},
    {'class': 'MANGROVE', 'selected': False, 'rgba': '4, 56, 29, 77'},
    {'class': 'MINING', 'selected': False, 'rgba': '156, 0, 39, 77'},
    {'class': 'ROCKY', 'selected': False, 'rgba': '255, 170, 95, 77'},
    {'class': 'NON FOREST', 'selected': False, 'rgba': '173, 151, 90, 77'},
    {'class': 'APICUM', 'selected': False, 'rgba': '252, 129, 20, 77'},
    {'class': 'HERBACEOUS SANDBANK', 'selected': False, 'rgba': '173, 81, 0, 77'},
    {'class': 'ASPHALT', 'selected': False, 'rgba': '206, 206, 206, 77'}
 ]

#for i, c in enumerate(_class,1):
#    print(f"'{rgb2hex(*[int(n) for n in c['rgba'].split(', ')[0:3]])}', // {i} {c['class']}")

In [None]:
replace = {}

old = gpd.read_file('data/amostra_rois_v5_3857.gpkg')

def getbox(row):
    return box(row['left'], row['top'], row['right'], row['bottom']).centroid

for i, c in enumerate(_class,1):
    replace[c['class']] = str(i)
     

def asint(x):
    if x is None:
        return 0
    return int(x)



In [None]:
def get_geohash(c):
    return geohash.encode(c.y, c.x, precision=7)

In [None]:
gdf = gpd.read_file(glob('../*/*.gpkg')[0])
_hash = get_geohash(gdf.to_crs(4326).geometry.unary_union.centroid)
old[old['hash'] == _hash].empty


In [None]:
for file in tqdm(glob('../*/*.gpkg')):
    
    tmp = gpd.read_file(file)
    _hash = get_geohash(tmp.to_crs(4326).geometry.unary_union.centroid)
    if not old[old['hash'] == _hash].empty:
        tmp['class_id'] = tmp['bing_class'].replace(replace).apply(asint)
        tmp['hash_macro'] = _hash[:6]
        tmp['hash_micro'] = _hash
        tmp['class_name'] = tmp['bing_class']
        tmp['geometry'] = tmp.apply(getbox, axis=1)
        tmp = tmp[['hash_macro', 'hash_micro', 'class_id', 'class_name', 'geometry']]
        data.append(tmp.set_crs(3857))
    
gdf = pd.concat(data)

gdf = gdf[gdf['class_id'] > 0]

gdf['id'] = gdf.reset_index().index

In [None]:
gdf['hash1'] = gdf['hash_macro'].apply(lambda x:x[0])

In [None]:
total_amostra = 35_000



dataf = []
for macro in gdf['hash1'].unique():
    gdf_macro = gdf[gdf['hash1'] == macro].copy()
    micros = gdf_macro['hash_micro'].unique()
    total_micros = len(micros)
    total_group = total_amostra // total_micros
    print(total_amostra , total_group, total_micros)
    print(len(gdf_macro))
    rows = []
    if len(gdf_macro) < total_amostra:
        
        dataf.append(gdf_macro)
    else:
        for micro in gdf_macro['hash_micro'].unique():
            gdf_micro = gdf_macro[gdf_macro['hash_micro'] == micro]
            
            class_id = gdf_micro['class_id'].unique()
            total_class = len(class_id)
            
            for c in class_id:
                gdf_class = gdf_micro[gdf_micro['class_id'] == c]
                if len(gdf_class) < total_group//total_class:
                    rows.append(gdf_class)
                    #print(f'get total micro {micro} class {c}')
                else:
                    #print(f'Processando micro {micro} class {c} {total_group//total_class}')
                    gdf_class = gdf_class.sample(n=total_group//total_class, replace=False, random_state=42)
                    
                    rows.append(gdf_class)
        
        tmp = pd.concat(rows)
        
        clear = gdf_macro[~gdf_macro['id'].isin(tmp['id'])]
        print(len(tmp))
        print(total_amostra - len(tmp), len(clear))
    
        dataf.append( tmp)#, clear.sample(n=total_amostra-len(tmp), replace=False) ])

amostras_okey = pd.concat(dataf)
        

In [None]:
amostras_okey.to_file('data/amostras_cerrado5_clearv3.shp')