

<p align="center">
  <img src="../assets/imgs/creat_area_amostras.svg" alt="drawing" style="width:600px;"/>
</p>

In [None]:
import geopandas as gpd
from glob import glob
import pandas as pd
from shapely.geometry import box
import geohash
import numpy as np
from tqdm import tqdm

In [None]:
def area_cat(area):
    if area < 0.001:
        return '< 0.001'
    elif area < 0.5:
        return '0.001 - 0.5'
    elif area < 1:
        return '0.5 - 1'
    elif area < 2:
        return '1 - 2'
    elif area < 4:
        return '2 - 4'
    elif area < 8:
        return '4 - 8'
    elif area < 10:
        return '8 - 10'
    elif area < 20:
        return '10 - 20'
    elif area < 30:
        return '20 - 30'
    elif area < 40:
        return '30 - 40'
    elif area < 50:
        return '40 - 50'
    elif area < 100:
        return '50 - 100'
    elif area < 150:
        return '100 - 150'
    elif area < 300:
        return '150 - 300'
    elif area < 450:
        return '300 - 450'
    elif area < 600:
        return '450 - 600'
    elif area < 750:
        return '600 - 750'
    elif area < 900:
        return '750 - 900'
    elif area < 1000:
        return '900 - 1000'
    elif area < 2000:
        return '1000 - 2000'
    else:
        return '> 2000'

def geohash_grid(geometry, precision):
    geohashes = set()
    min_lon, min_lat, max_lon, max_lat = geometry.bounds
    
    for lat in np.arange(min_lat, max_lat, 0.0005):
        for lon in np.arange(min_lon, max_lon, 0.0005):
            geohashes.add(geohash.encode(lat, lon, precision=precision))
    return list(geohashes)

def get_geohash(geom, precision=14):
    centroid = geom.centroid
    return geohash.encode(centroid.y, centroid.x, precision=precision)

def hash_box(geohash_code):
    bbox = geohash.bbox(geohash_code)
    return bbox['w'], bbox['n'], bbox['e'], bbox['s']


## Os dados foram obtido com Observatorio da Restauracao no dia 26-04-2024

In [None]:
files=glob('../orr26042024/output/*.gpkg')
files

## Agregar os dados e remover duplicados

In [None]:
data = []

for file in files:
    try:
        gdf = gpd.read_file(file)
        gdf = gdf.rename(columns={'metprinci': 'met_princ'})
        gdf['area_ha'] = gdf.to_crs(5880).area / 10_000
        gdf['area_cat'] = gdf['area_ha'].apply(area_cat)
        gdf['file'] = file
        gdf = gdf[['met_princ', 'met_comb', 'area_ha', 'area_cat', 'file', 'geometry']]
        data.append(gdf)
    except Exception as e:
        print(f"Erro no arquivo {file}: {e}")

# Concatena todos os dados em um único GeoDataFrame
all_df = gpd.GeoDataFrame(pd.concat(data)).to_crs(4326)
all_df['hash'] = all_df['geometry'].apply(get_geohash)
clear = all_df.drop_duplicates(subset=['hash']).copy()

# Salva dados limpos em um arquivo
clear[['met_princ', 'met_comb', 'area_ha', 'area_cat', 'file', 'geometry']].to_file('all_data.gpkg')

In [None]:
precisions = [3,6]
for precision in precisions:
    col_hash = f'hash{precision}'
    clear[col_hash] = clear.geometry.apply(lambda geom: geohash_grid(geom, precision=precision))

    set_hash = set()
    for hashes in clear[col_hash]:
        set_hash.update(hashes)

    tmp = gpd.GeoDataFrame(
        [{'hash': h, 'geometry': box(*hash_box(h))} for h in set_hash], crs=4326).to_crs(3857)
    tmp.to_file(f'amostra_hash{precision}.gpkg')
    tmp.to_file(f'amostra_hash{precision}.shp')


In [None]:
allhash = []
for base_hash in tqdm(clear['hash6'].unique()):
    for suffix in '0123456789bcdefghjkmnpqrstuvwxyz':
        allhash.append({
            'hash': f'{base_hash}{suffix}',
            'geometry': box(*hash_box(f'{base_hash}{suffix}'))
        })

gdf_d = gpd.GeoDataFrame(allhash, geometry='geometry', crs=4326)
gdf_d.to_file('amostra7_fulldata.gpkg')
