In [1]:
import eeconvert as eeconvert
import ee
import geemap
import geopandas as gpd
import rasterio as rio
import numpy as np
import rasterio.features as features
from pathlib import Path
import os
import shutil
from shapely.geometry import Point
import numpy as np
import time
import multiprocessing as mp
from time import sleep
import random

ee.Initialize()


class DatesHelper:
    def __init__(self, DATA_DIR, AOI, DATE_RANGE, n_cores = 10,  bypass=False):
        self.data_dir = DATA_DIR
        self.aoi = AOI
        self.date_range = DATE_RANGE
        self.n_cores = n_cores
        self.bypass=bypass
    
    
    def download_modis(self, max_ndvi_image):
        parent = gpd.read_file(f"{self.data_dir}/interim/parent.gpkg")        

        cpus = self.n_cores
        parent_chunks = np.array_split(parent, cpus)
        pool = mp.Pool(processes=cpus)
        chunk_processes = [pool.apply_async(self._download_modis_chunk, args=(max_ndvi_image, chunk, parent)) for chunk in parent_chunks]
        chunk_results = [chunk.get() for chunk in chunk_processes]
        
        
    
    def _download_modis_chunk(self, max_ndvi_image, gdf_chunk, gdf_complete):
        sleep(random.random()*2.0)
        
        for index,row in gdf_chunk.iterrows():
            aoi = ee.Geometry.Rectangle(row.geometry.bounds)
            geemap.ee_export_image(
                max_ndvi_image, 
                filename=f"{self.tile_dir}/{row.pgrid_id}.tif", 
                scale=250, 
                region=aoi, 
                file_per_band=False
            )
          

    def extract_best_dates(self):
        start = time.time()
        # Get best date for each tile
        aoi = eeconvert.gdfToFc(gpd.read_file(self.aoi))
        afghanistan = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level0").filter("ADM0_NAME == 'Afghanistan'");
        modis = ee.ImageCollection('MODIS/061/MOD13Q1').filter(ee.Filter.date(self.date_range[0], self.date_range[1]));
        dates = modis.map(lambda x: ee.Feature(None, {'date': x.date().format('YYYY-MM-dd')})).distinct('date').aggregate_array('date')        
        dates = dates.getInfo()
        
        ndvi = modis.select('NDVI');
        ndvi_array = ndvi.toArray();
        max_ndvi_date = ndvi_array.arrayArgmax();
        max_ndvi_image = ee.Image(max_ndvi_date).arrayProject([0]).arrayFlatten([['maxDate_start', 'band2']]).clip(aoi).select("maxDate_start");

        # Remap values so '0' doesnt overlap with nodata        
        fromValues = []
        i = 0
        for date in dates:
            fromValues.append(i)
            i +=1

        toValues = []
        for val in fromValues:
            toValues.append(val+1)

        max_ndvi_image = max_ndvi_image.remap(**{
          "from": fromValues,
          "to": toValues,
          "defaultValue": 0,
          "bandName": 'maxDate_start'
        });
        
        
        # Create a value - date hashmap for future remapping
        self.date_dict = {}
        i = 1
        for date in dates:
            self.date_dict[str(i)] = date
            i+=1
        self.date_dict["0"] = -99
        print(f"Done with MODIS best date calculation.. - {time.time()-start} sec")
        
        # Download modis tiles at parent resolution
        DATA_DIR = self.data_dir
        MODIS_DIR = f"{self.data_dir}/interim/modis"
        self.modis_dir = MODIS_DIR
        if os.path.exists(MODIS_DIR):
            shutil.rmtree(MODIS_DIR)
        TILE_DIR = f"{self.modis_dir}/tiles" 
        self.tile_dir = TILE_DIR
        Path(TILE_DIR).mkdir(parents=True, exist_ok=True)
        Path(MODIS_DIR).mkdir(parents=True, exist_ok=True)
        
        if not self.bypass:
            self.download_modis(max_ndvi_image)
        

        
        
        if os.path.exists(f"{MODIS_DIR}/interim/temp.vrt"):
            os.remove(f"{MODIS_DIR}/interim/temp.vrt")
        if os.path.exists(f"{MODIS_DIR}/interim/merged.tif"):
            os.remove(f"{MODIS_DIR}/interim/merged.tif")
        if os.path.exists(f"{MODIS_DIR}/interim/merged.gpkg"):
            os.remove(f"{MODIS_DIR}/interim/merged.gpkg")
        if os.path.exists(f"{MODIS_DIR}/interim/shell.tif"):
            os.remove(f"{MODIS_DIR}/interim/shell.tif")
        if os.path.exists(f"{MODIS_DIR}/interim/shell.tif"):
            os.remove(f"{MODIS_DIR}/interim/shell.gpkg")
        if os.path.exists(f"{MODIS_DIR}/interim/centroids.gpkg"):
            os.remove(f"{MODIS_DIR}/interim/centroids.gpkg")

        # Merge downloaded modis tiles into one
        os.system(f'find {TILE_DIR}  -maxdepth 1 -name "*.tif" -print0 | xargs --null -I{"{}"} gdalbuildvrt {MODIS_DIR}/temp.vrt {"{}"} -srcnodata "0"')
        os.system(f'gdal_merge.py -o {MODIS_DIR}/merged.tif {MODIS_DIR}/temp.vrt')
        
        print(f"Merged tiles.. - {time.time()-start} sec")
        
        
        # Create shell GDF (workaround because polygonize doesn't uncombine tiles with same value)
        with rio.open(f"{MODIS_DIR}/merged.tif") as src:
            array = src.read(1)
            transform = src.transform
            crs = src.crs
            profile = src.profile

        h, w = array.shape

        new_array = np.arange(h*w).reshape(h,w)

        with rio.Env():

            # Write an array as a raster band to a new 8-bit file. For
            # the new file's profile, we start with the profile of the source
            profile = src.profile

            # And then change the band count to 1, set the
            # dtype to uint8, and specify LZW compression.
            profile.update(
                dtype=rio.uint32,
                count=1,
                compress='lzw')

            with rio.open(f'{MODIS_DIR}/shell.tif', 'w', **profile) as dst:
                dst.write(new_array.astype(rio.uint32), 1)
        
        print(f"Shell GDF created.. - {time.time()-start} sec")        
    
        os.system(f'gdal_polygonize.py {MODIS_DIR}/shell.tif -b 1 -f "GPKG" {MODIS_DIR}/shell.gpkg OUTPUT DateCode')
        print(f"Polygonized shell.. - {time.time()-start} sec")
        
        os.system(f'gdal_polygonize.py {MODIS_DIR}/merged.tif -b 1 -f "GPKG" {MODIS_DIR}/merged.gpkg OUTPUT DateCode')
        print(f"Polygonized merged.. - {time.time()-start} sec")
        
        with rio.open(f"{MODIS_DIR}/merged.tif") as src:
            band1 = src.read(1)
            height = band1.shape[0]
            width = band1.shape[1]
            cols, rows = np.meshgrid(np.arange(width), np.arange(height))
            xs, ys = rio.transform.xy(src.transform, rows, cols)
            lons = np.array(xs)
            lats = np.array(ys)

            points = gpd.GeoSeries(
                list(zip(lons.flatten(), lats.flatten()))).map(Point)

            # use the feature loop in case shp is multipolygon
            geoms = points.values
            features = [i for i in range(len(geoms))]

            out = gpd.GeoDataFrame(
                {'feature': features, 'geometry': geoms}, crs=src.crs)
            out.to_file(f"{MODIS_DIR}/centroids.gpkg", driver="GPKG")
        merged = gpd.read_file(f"{MODIS_DIR}/merged.gpkg")
        centroids = gpd.read_file(f"{MODIS_DIR}/centroids.gpkg")
        centroids = centroids.sjoin(merged, how="inner", predicate='intersects')
        centroids = centroids[['feature', 'geometry', 'DateCode']]
        print(f"Created Centroids.. - {time.time()-start} sec")
        
        child = gpd.read_file(f"{self.data_dir}/interim/child.gpkg")
        child = child.sjoin(centroids,  how="inner", predicate='intersects')
        child = child[['DateCode', 'geometry']]
        child['DateCode'] = child['DateCode'].astype(str)
        child = child.replace({"DateCode": self.date_dict})
        child = child.reset_index()
        child.columns = ["grid_id", "BSD", "geometry"]
        child.to_file(f"{MODIS_DIR}/child.gpkg", driver="GPKG")
        child = gpd.read_file(f"{MODIS_DIR}/child.gpkg")
        child = child[child['BSD'] != "-99"]
        child = child.reset_index()
        child = child[['index', 'BSD', 'geometry']]
        child.columns = ['grid_id', 'BSD', 'geometry']
        child.to_file(f"{self.data_dir}/interim/child.gpkg", driver="GPKG")
        print(f"Saved child GDF. Completed! - {time.time()-start} sec")
    
#         self.alt_joined = child
        
#         shell = gpd.read_file(f"{DATA_DIR}/interim/shell.gpkg")
#         joined = shell.sjoin(centroids,  how="inner", predicate='intersects')
#         child = joined # dev: comment and rename 'joined' to 'child' in line above last
#         child = child[["DateCode_right", "geometry"]]
#         child['DateCode_right'] = child['DateCode_right'].astype(str)
#         child = child.replace({"DateCode_right": self.date_dict})
#         child = child[child['DateCode_right'] != "-99"] # why is this -99?
#         child = child.reset_index()
#         child.columns = ["GRID_ID", "BSD", "geometry"]
#         child.to_file(f"{DATA_DIR}/interim/modis.gpkg", driver="GPKG")
#         child = gpd.read_file(f"{DATA_DIR}/interim/modis.gpkg")
# #         print(child.columns)
#         child = child[child['BSD'] != "-99"]
#         child = child.reset_index()
#         child = child[['index', 'BSD', 'geometry']]
#         child.columns = ['grid_id', 'BSD', 'geometry']
#         child.to_file(f"{DATA_DIR}/interim/modis.gpkg", driver="GPKG")
#         print(f"Saved child GDF. Completed! - {time.time()-start} sec")
        
        
#         self.merged = merged
#         self.centroids = centroids
# #         self.joined = joined
#         self.child = child
        

*** Earth Engine *** FINAL DEADLINE: ee.Authenticate will fail after 2022-06-06. Please upgrade. https://developers.google.com/earth-engine/guides/python_install


In [2]:
dh = DatesHelper("../../data", "../../data/inputs/aoi.gpkg", ['2019-01-01', '2019-06-15'])

In [3]:
dh.extract_best_dates()

Done with MODIS best date calculation.. - 0.18895864486694336 sec
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/3eb391f6e9ff82967ab2215798e1d3eb-f1b50caf69b970230cdbf5f84f109198:getPixels
Please wait ...
Generating URL ...
Data downloaded to /home/arogya/projects/afg-clustering/data/interim/modis_tiles/15.tif
Generating URL ...
Generating URL ...
Generating URL ...
Generating URL ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/ff0654b0652404abe55ada32b8a63a87-4450ed0d284c93d99346988373d4eb55:getPixels
Please wait ...
Generating URL ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/dc058db8a94e20114e9a528f679a5aee-b5bf11c5913fd678a2ca17b199cd2496:getPixels
Please wait ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/pro

    it falls back to returning a pandas Series. But in the future, we will start
    to raise a TypeError instead.
  points = gpd.GeoSeries(
  pd.Int64Index,


Created Centroids.. - 12.764832735061646 sec


  pd.Int64Index,
  pd.Int64Index,


Saved child GDF. Completed! - 13.10758352279663 sec


In [115]:
# import matplotlib.pyplot as plt

# modis = gpd.read_file("../../data/interim/modis.gpkg")
# child = gpd.read_file("../../data/interim/child.gpkg")
# merged = gpd.read_file("../../data/interim/merged.gpkg")
# fig,ax = plt.subplots(1,1,figsize=(8,10), dpi=150)
# # dh.joined.boundary.plot(color="red", linewidth=0.5, ax=ax)
# # dh.centroids.plot(color="green", linewidth=0.5, ax=ax, markersize=0.2 )
# # child.boundary.plot(color="black", linewidth=0.3, ax=ax)
# dh.alt_joined.plot(column="DateCode", linewidth=0.3, ax=ax)
# # dh.alt_joined.boundary.plot(color="red", linewidth=0.3, ax=ax)
# # dh.centroids.plot(color="black", markersize=0.2, ax=ax)

# # child.sjoin(dh.centroids)['DateCode'].unique()
# # child.sjoin(dh.centroids).plot(ax=ax)
# # ax.set_xlim(xmin=64.07, xmax=64.1)
# # ax.set_ylim(ymin=31.46, ymax=31.5 )

# len(child), len(dh.alt_joined), len(dh.joined)
# dh.alt_joined

In [119]:
dh.alt_joined.columns, dh.joined.columns, dh.child.columns

(Index(['grid_id', 'geometry', 'index_right', 'feature', 'DateCode'], dtype='object'),
 Index(['DateCode_left', 'geometry', 'index_right', 'feature',
        'DateCode_right'],
       dtype='object'),
 Index(['grid_id', 'BSD', 'geometry'], dtype='object'))

In [92]:
dh.alt_joined.groupby('DateCode').count()['feature']
dh.alt_joined.geometry.type.unique()

array(['Polygon'], dtype=object)

In [86]:
dh.joined.groupby('DateCode_right').count()['feature']

DateCode_right
0     10962
2         6
3       318
4       150
5       294
6      7950
7     16152
8      1242
9       348
10      156
11      222
Name: feature, dtype: int64

In [87]:
child

Unnamed: 0,grid_id,geometry
0,0,"POLYGON ((64.03213 31.43173, 64.03475 31.43183..."
1,1,"POLYGON ((64.03225 31.42948, 64.03487 31.42958..."
2,2,"POLYGON ((64.03237 31.42723, 64.03499 31.42734..."
3,3,"POLYGON ((64.03249 31.42499, 64.03511 31.42509..."
4,4,"POLYGON ((64.03260 31.42274, 64.03522 31.42284..."
...,...,...
3919,3919,"POLYGON ((64.17475 31.51826, 64.17737 31.51836..."
3920,3920,"POLYGON ((64.17487 31.51602, 64.17749 31.51612..."
3921,3921,"POLYGON ((64.17498 31.51377, 64.17761 31.51387..."
3922,3922,"POLYGON ((64.17510 31.51152, 64.17772 31.51162..."


In [20]:
import geopandas as gpd

In [21]:
child = gpd.read_file("../../data/interim/child.gpkg")

In [22]:
child

Unnamed: 0,index,BSD,geometry
0,4122,2019-03-06,"POLYGON ((64.03416 31.53985, 64.03416 31.53760..."
1,4123,2019-03-06,"POLYGON ((64.03416 31.53985, 64.03416 31.53760..."
2,4124,2019-03-06,"POLYGON ((64.03416 31.53985, 64.03416 31.53760..."
3,4125,2019-03-06,"POLYGON ((64.03416 31.53985, 64.03416 31.53760..."
4,4126,2019-03-06,"POLYGON ((64.03416 31.53985, 64.03416 31.53760..."
...,...,...,...
80509,99751,2019-04-23,"POLYGON ((64.17340 31.38264, 64.17340 31.38040..."
80510,99752,2019-04-23,"POLYGON ((64.17340 31.38264, 64.17340 31.38040..."
80511,99753,2019-04-23,"POLYGON ((64.17340 31.38264, 64.17340 31.38040..."
80512,99754,2019-04-23,"POLYGON ((64.17340 31.38264, 64.17340 31.38040..."


In [34]:
a=ee.List(['fafa', 'fofo'])

In [35]:
a.getInfo()

['fafa', 'fofo']

In [22]:
import eeconvert as eeconvert
import ee
import geemap
import geopandas as gpd
import rasterio as rio
import numpy as np
import rasterio.features as features

In [23]:
ee.Initialize()

In [24]:
afghanistan = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level0").filter("ADM0_NAME == 'Afghanistan'");
modis = ee.ImageCollection('MODIS/061/MOD13Q1').filter(ee.Filter.date('2019-01-01', '2019-06-15'));
dates = modis.map(lambda x: ee.Feature(None, {'date': x.date().format('YYYY-MM-dd')})).distinct('date').aggregate_array('date')

In [25]:
ndvi = modis.select('NDVI');
# ndvi1 = ndvi.max();
ndvi_Array = ndvi.toArray();
test = ndvi_Array.arrayArgmax();
test1 = ee.Image(test).arrayProject([0]).arrayFlatten([['maxDate_start', 'band2']]).clip(afghanistan).select("maxDate_start");
projection = test1.projection()
reduction = test1.reduceRegion(ee.Reducer.frequencyHistogram(), afghanistan, 250, projection.crs)
# values = ee.Dictionary(reduction.get(test1.bandNames)).keys().map(ee.Number.parse);

fromValues = []
i = 0
for date in dates.getInfo():
    fromValues.append(i)
    i +=1

toValues = []
for val in fromValues:
    toValues.append(val+1)

test1 = test1.remap(**{
  "from": fromValues,
  "to": toValues,
  "defaultValue": 0,
  "bandName": 'maxDate_start'
});

In [26]:
# parent = gpd.read_file("/data/tmp/arogya/data/interim/parent.gpkg")
parent = gpd.read_file("../../data/interim/parent.gpkg")

for index,row in parent.iterrows():
    print(row.pgrid_id)
    aoi = ee.Geometry.Rectangle(row.geometry.bounds)
    geemap.ee_export_image(
        test1, 
        filename=f"../../data/interim/tiles/{row.pgrid_id}.tif", 
        scale=250, 
        region=aoi, 
        file_per_band=False
    )

# ee.Geometry.Rectangle([64.0321287595154, 31.3779740304129, 64.17798786794364, 31.542232512963988])

0
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/thumbnails/bc7db7fddb540ec0cb0a81993eea416d-c356be000987f1db070152502446b7b6:getPixels
Please wait ...
Data downloaded to /home/arogya/projects/afg-clustering/data/interim/tiles/0.tif


In [27]:
import os
os.system(f'gdalbuildvrt ../../data/interim/temp.vrt ../../data/interim/tiles/*.tif -srcnodata "0"')
# print(f"--- Merging for District {dist_id}: {time.time() - self.start_time} seconds ---")
os.system(f'gdal_merge.py -o ../../data/interim/merged.tif ../../data/interim/temp.vrt')



0

In [28]:
with rio.open("../../data/interim/merged.tif") as src:
    array = src.read(1)
    transform = src.transform
    crs = src.crs
    profile = src.profile
    
h, w = array.shape

new_array = np.arange(h*w).reshape(h,w)

with rio.Env():

    # Write an array as a raster band to a new 8-bit file. For
    # the new file's profile, we start with the profile of the source
    profile = src.profile

    # And then change the band count to 1, set the
    # dtype to uint8, and specify LZW compression.
    profile.update(
        dtype=rio.uint32,
        count=1,
        compress='lzw')

    with rio.open('../../data/interim/shell.tif', 'w', **profile) as dst:
        dst.write(new_array.astype(rio.uint32), 1)

In [29]:
os.system('gdal_polygonize.py ../../data/interim/shell.tif -b 1 -f "GPKG" ../../data/interim/shell.gpkg OUTPUT DateCode')

0

In [30]:
os.system('gdal_polygonize.py ../../data/interim/merged.tif -b 1 -f "GPKG" ../../data/interim/merged.gpkg OUTPUT DateCode')

0

In [31]:
shell = gpd.read_file("../../data/interim/shell.gpkg")
merged = gpd.read_file("../../data/interim/merged.gpkg")

In [32]:
with_dates = shell.sjoin(merged,  how="inner", predicate='intersects')
with_dates.to_file("../../data/interim/with_dates.gpkg")

  pd.Int64Index,


In [33]:
import rasterio
import numpy as np
import geopandas as gpd
from shapely.geometry import Point

path = '../../data/interim/merged.tif'


with rasterio.open(path) as src:
    band1 = src.read(1)
    height = band1.shape[0]
    width = band1.shape[1]
    cols, rows = np.meshgrid(np.arange(width), np.arange(height))
    xs, ys = rasterio.transform.xy(src.transform, rows, cols)
    lons = np.array(xs)
    lats = np.array(ys)

    points = gpd.GeoSeries(
        list(zip(lons.flatten(), lats.flatten()))).map(Point)

    # use the feature loop in case shp is multipolygon
    geoms = points.values
    features = [i for i in range(len(geoms))]

    out = gpd.GeoDataFrame(
        {'feature': features, 'geometry': geoms}, crs=src.crs)
    out.to_file("../../data/interim/centroids.gpkg", driver="GPKG")

    it falls back to returning a pandas Series. But in the future, we will start
    to raise a TypeError instead.
  points = gpd.GeoSeries(
  pd.Int64Index,


In [41]:
centroids = gpd.read_file("../../data/interim/centroids.gpkg")
centroids = centroids.sjoin(merged, how="inner", predicate='intersects')
centroids = centroids[['feature', 'geometry', 'DateCode']]
centroids

Unnamed: 0,feature,geometry,DateCode
0,0,POINT (64.02630 31.54996),6
3,3,POINT (64.03304 31.54996),6
4,4,POINT (64.03528 31.54996),6
5,5,POINT (64.03753 31.54996),6
123,123,POINT (64.02630 31.54771),6
...,...,...,...
13030,13030,POINT (64.28456 31.31415),6
13034,13034,POINT (64.29355 31.31415),6
13035,13035,POINT (64.29579 31.31415),4
13036,13036,POINT (64.29804 31.31415),4


In [43]:
shell.sjoin(centroids,  how="inner", predicate='intersects').to_file("../../data/interim/centroid_dates.gpkg", driver="GPKG")

  pd.Int64Index,


In [35]:
import rasterio.features
maskShape = rasterio.features.shapes(mask.astype('uint8'))
mypoly=[]
for vec in maskShape:
    mypoly.append(vec[0])
print(mypoly)


NameError: name 'mask' is not defined

In [None]:
src = rio.open("../../data/interim/tiles/merged.tif")
input_array = src.read(1)
input_transform = src.transform
input_crs = src.crs



# Create array with a unique value per cell
unique_pixels = np.arange(input_array.size).reshape(input_array.shape)

# Vectorise each unique feature in array
vectors = features.shapes(
    source=unique_pixels.astype(np.int16), transform=input_transform
)

# Extract polygons and values from generator
vectors = list(vectors)
values = [value for polygon, value in vectors]
polygons = [shape(polygon) for polygon, value in vectors]

# Create a geopandas dataframe populated with the polygon shapes
poly_gdf = gpd.GeoDataFrame(data={"id": values}, geometry=polygons, crs=input_crs)


In [None]:
from osgeo import gdal, ogr, osr

in_path = '../../data/interim/tiles/merged.tif'

out_path = '../../data/interim/tiles/merged.gpkg'

#  get raster datasource
src_ds = gdal.Open( in_path )
#
srcband = src_ds.GetRasterBand(1)
dst_layername = 'MAXNDVI'
drv = ogr.GetDriverByName("GPKG")
dst_ds = drv.CreateDataSource( out_path )

sp_ref = osr.SpatialReference()
sp_ref.SetFromUserInput('EPSG:4326')

dst_layer = dst_ds.CreateLayer(dst_layername, srs = sp_ref )

fld = ogr.FieldDefn("HA", ogr.OFTInteger)
dst_layer.CreateField(fld)
dst_field = dst_layer.GetLayerDefn().GetFieldIndex("HA")

gdal.Polygonize(srcband, None, dst_layer, dst_field, [], callback=None)

del src_ds
del dst_ds

In [None]:
geemap.ee_export_image(
    test1, filename="../../data/interim/modis.tif", scale=250, region=ee.Geometry.Rectangle([64.0321287595154, 31.3779740304129, 64.17798786794364, 31.542232512963988]), file_per_band=False
)

In [None]:
import rasterio as rio 
import geopandas as gpd
import pandas as pd
import numpy as np
import rioxarray
import matplotlib.pyplot as plt
import earthpy as et
import earthpy.spatial as es
import earthpy.plot as ep

In [None]:
gdf0 = gpd.read_file("../../data/interim/child.gpkg")
gdf0.unary_union.bounds

In [None]:
# from rasterstats import zonal_stats

with rio.open("/data/tmp/arogya/data/inputs/all_afg_raster.tiff", nodata=0) as src:
    affine = src.transform
    array = src.read(1)
    
#     df_zonal_stats = pd.DataFrame(zonal_stats(gdf, array, affine=affine))

# # # adding statistics back to original GeoDataFrame
# gdf2 = pd.concat([gdf, df_zonal_stats], axis=1) 

In [None]:
rds = rioxarray.open_rasterio("/data/tmp/arogya/data/inputs/all_afg_raster.tiff")
rds.name = "data"
df = rds.squeeze().to_dataframe().reset_index()
geometry = gpd.points_from_xy(df.x, df.y)
gdf = gpd.GeoDataFrame(df, crs=rds.rio.crs, geometry=geometry)

In [None]:
a = gdf.sjoin(gdf0, how="inner", predicate='intersects')
a

In [None]:
fig, ax = plt.subplots(1,1, dpi=200, figsize=(30, 60))
a.plot(ax=ax)
gdf0.boundary.plot(ax=ax)
# ax.imshow(array, cmap='jet')

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))


rst = rasterio.open("/data/tmp/arogya/data/inputs/all_afg_raster.tiff")
red = rst.read(1)
rio.plot.show(red, ax=ax)

In [None]:
import itertools
import rasterio
from shapely.geometry import box
import geopandas as gpd

with rasterio.open("../../data/interim/tiles/merged.tif") as dataset:
    data = dataset.read(1)

    t = dataset.transform

    move_x = t[0]
    # t[4] is negative, as raster start upper left 0,0 and goes down
    # later for steps calculation (ymin=...) we use plus instead of minus
    move_y = t[4]

    height = dataset.height
    width = dataset.width 

    polygons = []
    indices = list(itertools.product(range(width), range(height)))
    for x,y in indices:
        x_min, y_max = t * (x,y)
        x_max = x_min + move_x
        y_min = y_max + move_y
        polygons.append(box(x_min, y_min, x_max, y_max))

data_list = []
for x,y in indices:
    data_list.append(data[y,x])
    
gdf = gpd.GeoDataFrame(data=data_list, crs={'init':'epsg:4236'}, geometry=polygons, columns=['value'])
gdf.to_file("../../data/interim/merged2.gpkg", driver="GPKG")