In [2]:
import geopandas as gpd
from osgeo import ogr
from math import ceil
import shutil
import os
from shapely.geometry import box
from pathlib import Path

import warnings
warnings.filterwarnings("ignore")


# Shape file helper class
class ShapefileHelper():
    
    def __init__(self, dissolved_aoi_shapefile, vector_output_dir):
        self.raw = dissolved_aoi_shapefile
        self.vector_output_dir = vector_output_dir + "/"
        self._make_dir()
        self._read_file()
    
    def make_grid(self, resolution, name, out_crs="epsg:4326", id_col="grid_id"):
#         self.gdf.boundary.plot()
        
        gridWidth, gridHeight = resolution, resolution
        xmin, ymin, xmax, ymax = self.gdf.bounds.iloc[0]['minx'],  self.gdf.bounds.iloc[0]['miny'],  self.gdf.bounds.iloc[0]['maxx'],  self.gdf.bounds.iloc[0]['maxy']
        
        outputGridfn = name
        
        # get rows
        rows = ceil((ymax-ymin)/gridHeight)
        # get columns
        cols = ceil((xmax-xmin)/gridWidth)

        # start grid cell envelope
        ringXleftOrigin = xmin
        ringXrightOrigin = xmin + gridWidth
        ringYtopOrigin = ymax
        ringYbottomOrigin = ymax-gridHeight
        
        # create output file
        outDriver = ogr.GetDriverByName('ESRI Shapefile')
#         if os.path.exists(self.vector_output_dir + outputGridfn):
#             print("Deleting pre-existing shapefile: {}{}{}.shp".format(self.vector_output_dir,outputGridfn+"/", outputGridfn))
#             shutil.rmtree(self.vector_output_dir + outputGridfn,  ignore_errors=True)
            
#         if os.path.isdir(self.vector_output_dir):
#             print("Deleting pre-existing folder: {}{}/".format(self.vector_output_dir, outputGridfn))
#             shutil.rmtree(self.vector_output_dir + outputGridfn,  ignore_errors=True)
#         else:
#             os.makedirs(self.vector_output_dir)
#             print("Created new directory: {}{}/".format(self.vector_output_dir, outputGridfn))
            
            
        outDataSource = outDriver.CreateDataSource(self.vector_output_dir + outputGridfn + ".shp")
        outLayer = outDataSource.CreateLayer(outputGridfn,geom_type=ogr.wkbPolygon)
        featureDefn = outLayer.GetLayerDefn()
        
        print(f'--------- Generating grid at {resolution}mx{resolution}m ...')

        # create grid cells
        countcols = 0
        while countcols < cols:
            countcols += 1

            # reset envelope for rows
            ringYtop = ringYtopOrigin
            ringYbottom =ringYbottomOrigin
            countrows = 0

            while countrows < rows:
                countrows += 1
                ring = ogr.Geometry(ogr.wkbLinearRing)
                ring.AddPoint(ringXleftOrigin, ringYtop)
                ring.AddPoint(ringXrightOrigin, ringYtop)
                ring.AddPoint(ringXrightOrigin, ringYbottom)
                ring.AddPoint(ringXleftOrigin, ringYbottom)
                ring.AddPoint(ringXleftOrigin, ringYtop)
                poly = ogr.Geometry(ogr.wkbPolygon)
                poly.AddGeometry(ring)

                # add new geom to layer
                outFeature = ogr.Feature(featureDefn)
                outFeature.SetGeometry(poly)
                outLayer.CreateFeature(outFeature)
                outFeature.Destroy

                # new envelope for next poly
                ringYtop = ringYtop - gridHeight
                ringYbottom = ringYbottom - gridHeight

            # new envelope for next poly
            ringXleftOrigin = ringXleftOrigin + gridWidth
            ringXrightOrigin = ringXrightOrigin + gridWidth
        # Close DataSources
        outDataSource.Destroy()
        print('--------- Grid generation complete. Saving to disk...')        
        
        
#         # clip to boundary
        output = gpd.read_file(self.vector_output_dir + outputGridfn +".shp")
        
        output = output.set_crs("epsg:32642").to_crs(out_crs).reset_index().rename(columns={"index": id_col}).drop('FID', axis=1)
        output = gpd.sjoin(output, self.gdf.to_crs("epsg:4326")).drop('index_right', axis=1)
        
        
#         output = gpd.sjoin(output.to_crs("epsg:4326"), self.gdf).drop('index_right', axis=1)

        
#         print("Saving generated shapefile to disk..")
#         if os.path.exists(self.vector_output_dir + outputGridfn):
#             directory = self.vector_output_dir + outputGridfn
#             for f in os.listdir(directory):
#                 os.remove(os.path.join(directory, f))

        self.output = output
        output = output.drop(id_col, axis=1).reset_index().drop('index', axis=1).reset_index().rename(columns={'index': id_col})
        output.to_file(self.vector_output_dir + outputGridfn + ".gpkg", driver='GPKG')
        self._clean_dir()
        print("--------- Successfully saved to disk: {}".format(self.vector_output_dir + outputGridfn+ ".gpkg"))
        return output

    def subset_grid(self, grid_path, aoi_path):
        aoi = gpd.read_file(aoi_path)
        grid = gpd.read_file(grid_path)
        
        subset = gpd.sjoin(grid, aoi).drop('index_right', axis=1)
        subset.to_file(grid_path, driver='GPKG')
        
    def _read_file(self):
        raw = gpd.read_file(self.raw)
#         geom = box(*raw.total_bounds)
#         gdf = gpd.GeoDataFrame([{"geometry": geom}])
#         gdf = gdf.set_crs("epsg:4326")
        self.gdf = raw.to_crs("epsg:32642")
    
    def _make_dir(self):
        Path(self.vector_output_dir).mkdir(parents=True, exist_ok=True)
    
    def _clean_dir(self):
        test = os.listdir(self.vector_output_dir)

        for item in test:
            if item.endswith(".shp"):
                os.remove(os.path.join(self.vector_output_dir, item))
            if item.endswith(".shx"):
                os.remove(os.path.join(self.vector_output_dir, item))
            if item.endswith(".dbf"):
                os.remove(os.path.join(self.vector_output_dir, item))
        


In [3]:
import geopandas as gpd
import numpy as np
import os
import pandas as pd


#%% Define Parameters
DATA_DIR = f'/data/tmp/arogya/data'
TILE_DIR = f'{DATA_DIR}/interim/tiles'
PARENT_GDF = f'{DATA_DIR}/interim/parent.gpkg'

parent = gpd.read_file(PARENT_GDF)

all_ids = set(list(np.arange(len(parent))))
tiles = set([int(file.split(".tif")[0]) for file in os.listdir(TILE_DIR)])
missing = all_ids - tiles
missing_gdf = parent[parent['pgrid_id'].isin(list(missing))]
districts = os.listdir(f"{DATA_DIR}/inputs/districts/districts/")

tiles_n = 0
missed_districts = []
for dist in districts:
    dist_shp = gpd.read_file(f"{DATA_DIR}/inputs/districts/districts/{dist}")
    no_of_missing = len(dist_shp.sjoin(missing_gdf, how="inner", predicate="intersects"))
    if no_of_missing > 0:
        print(f"{no_of_missing} tiles missing in {dist}")
        tiles_n += no_of_missing
        missed_districts.append(f"{DATA_DIR}/inputs/districts/districts/{dist}")

36 tiles missing in 3103.gpkg
6 tiles missing in 2006.gpkg
14 tiles missing in 813.gpkg
3 tiles missing in 2103.gpkg
2 tiles missing in 3102.gpkg
9 tiles missing in 2302.gpkg
3 tiles missing in 814.gpkg
45 tiles missing in 2313.gpkg
55 tiles missing in 2105.gpkg
6 tiles missing in 2014.gpkg
33 tiles missing in 2311.gpkg
64 tiles missing in 2306.gpkg
14 tiles missing in 2408.gpkg
1 tiles missing in 1609.gpkg
64 tiles missing in 2101.gpkg


In [None]:
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("data_dir", "Where is the data stored")
parser.add_argument("shp", "Shapefile for the AOI")
parser.add_argument("--n_cores", "Number of cores")

args = parser.parse_args()

DATA_DIR = args.data_dir
SHP_FILE = args.shp
N_CORES = 1

if args.n_cores:
    N_CORES = int(arg.n_cores)

import geopandas as gpd
import numpy as np
import os
import pandas as pd
from utils.rasters import RasterGenerationHelper
from utils.shapefile import ShapefileHelper
import time

TILE_DIR = f'{DATA_DIR}/interim/tiles'
PARENT_GDF = f'{TILE_DIR}/parent.gpkg'
REDO_GDF = f'{TILE_DIR}/redo.gpkg'

parent = gpd.read_file(PARENT_GDF)

all_ids = set(list(np.arange(len(parent))))
tiles = set([int(file.split(".tif")[0]) for file in os.listdir(TILE_DIR)])
missing = all_ids - tiles
missing_gdf = parent[parent['pgrid_id'].isin(list(missing))]
districts = os.listdir(f"{DATA_DIR}/inputs/districts/districts/")

tiles_n = 0
missed_districts = []
for dist in districts:
    dist_shp = gpd.read_file(f"{DATA_DIR}/inputs/districts/districts/{dist}")
    no_of_missing = len(dist_shp.sjoin(missing_gdf, how="inner", predicate="intersects"))
    if no_of_missing > 0:
        print(f"#### {no_of_missing} tiles missing in {dist}")
        tiles_n += no_of_missing
        missed_districts.append(f"{DATA_DIR}/inputs/districts/districts/{dist}")

_VECTOR_OUTPUT_DIR = f"{DATA_DIR}/interim"

for dist in missed_districts:
    print(f"#### Restarting download for file: {dist} ####")
    start = time.time()
    SHP_PATH = dist
    sh = ShapefileHelper(SHP_PATH, _VECTOR_OUTPUT_DIR)
    sh.make_grid(resolution=2500, name="missing")
    parent = parent[['pgrid_id', 'geometry']]
    dist_tiles = parent.sjoin(gpd.read_file(SHP_PATH))[['pgrid_id', 'geometry']]
    redo = parent.sjoin(dist_tiles)[['pgrid_id_left', 'geometry']]
    redo.columns = ['pgrid_id', 'geometry']
    print(f"#### Number of tiles: {len(redo)} ####")
    
    _TILE_OUTPUT_DIR = f'{DATA_DIR}/interim/tiles'
    _PATH_TO_PARENT_GRID = f"{_VECTOR_OUTPUT_DIR}/redo.gpkg"
    _PATH_TO_CHILD_GRID = f"{_VECTOR_OUTPUT_DIR}/child.gpkg"
    _N_CORES = N_CORES
    
#     if os.path.exists(_TILE_OUTPUT_DIR):
#         shutil.rmtree(_TILE_OUTPUT_DIR)
        
    Path(_TILE_OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
    rgh = RasterGenerationHelper(_PATH_TO_PARENT_GRID, _PATH_TO_CHILD_GRID, _TILE_OUTPUT_DIR, _N_CORES, clean = False, post_period_days = POST_PERIOD_DAYS)
    rgh.get_rasters()
    print(f"#### Download Complete in {(time.time() - start)}  ####")



In [4]:


SHP_PATH = missed_districts[11]
sh = ShapefileHelper(SHP_PATH, _VECTOR_OUTPUT_DIR)
sh.make_grid(resolution=2500, name="missing")
parent = parent[['pgrid_id', 'geometry']]
dist_tiles = parent.sjoin(gpd.read_file(SHP_PATH))[['pgrid_id', 'geometry']]
dist_tiles.head()
redo = parent.sjoin(dist_tiles)[['pgrid_id_left', 'geometry']]
redo.columns = ['pgrid_id', 'geometry']

--------- Generating grid at 2500mx2500m ...
--------- Grid generation complete. Saving to disk...
--------- Successfully saved to disk: ../data/interim/missing/missing.gpkg


Unnamed: 0,pgrid_id,geometry
30542,30542,"POLYGON ((63.97416 32.56892, 64.00069 32.56998..."
30543,30543,"POLYGON ((63.97542 32.54646, 64.00194 32.54752..."
30837,30837,"POLYGON ((63.99944 32.59245, 64.02598 32.59351..."
30838,30838,"POLYGON ((64.00069 32.56998, 64.02723 32.57104..."
30839,30839,"POLYGON ((64.00194 32.54752, 64.02847 32.54857..."


In [11]:
redo

Unnamed: 0,pgrid_id,geometry
30245,30245,"POLYGON ((63.94637 32.59032, 63.97291 32.59139..."
30246,30246,"POLYGON ((63.94763 32.56786, 63.97416 32.56892..."
30247,30247,"POLYGON ((63.94889 32.54539, 63.97542 32.54646..."
30541,30541,"POLYGON ((63.97291 32.59139, 63.99944 32.59245..."
30542,30542,"POLYGON ((63.97416 32.56892, 64.00069 32.56998..."
...,...,...
39699,39699,"POLYGON ((64.78454 32.86910, 64.81119 32.87000..."
39700,39700,"POLYGON ((64.78560 32.84661, 64.81225 32.84751..."
40015,40015,"POLYGON ((64.81013 32.89249, 64.83679 32.89338..."
40016,40016,"POLYGON ((64.81119 32.87000, 64.83784 32.87089..."


In [None]:
104034 +355