In [None]:
import geopandas as gpd
from shapely.geometry import Polygon,shape
import numpy as np
import os
import fiona
import rasterio
from rasterio.mask import mask
from pathlib import Path

np.random.seed(42)

cutline_path = "/home/jovyan/work/notebooks/satellite_data/utils/SA.geojson"
data_path = "/home/jovyan/work/satellite_data/ku_sync/South_Africa/all/"
years = list(range(2009,2021))


out_dir = "shapes/V7_covermap/"

# ncols = 100
# nrows = 100
patch_size = 256

test_ratio = 0.0000025

In [None]:
cut_df = gpd.read_file(cutline_path)

In [None]:
shapes = {}
Path(out_dir).mkdir(parents=True, exist_ok=True)
for year in years:
    polygons = []
    cut_df = gpd.read_file(cutline_path)
    cut_df = cut_df[cut_df["year"] == str(year)]
    for i,row in cut_df.iterrows():
        f = row["id"]
        if os.path.isfile(os.path.join(data_path,f+"_"+str(year)+".tif")):
            filename = f+"_"+str(year)+".tif"
        elif os.path.isfile(os.path.join(data_path,f+"_"+str(year)+".jp2.tif")):
            filename = f+"_"+str(year)+".jp2.tif"
        elif os.path.isfile(os.path.join(data_path,f+"_"+str(year)+".jp2")):
            filename = f+"_"+str(year)+".jp2"
        else:
            print(f"{f} is missing!")
            continue
        with rasterio.open(os.path.join(data_path,filename)) as img:
            if img.crs != 4326:
                print(f"{filename} not in CRS EPSG:4326!")
            
            xmin, ymin, xmax, ymax = img.bounds
            width = xmax-xmin
            height = ymax-ymin

            x = xmin
            y = ymin
            
            stepsize = patch_size * img.meta["transform"][0]
            ncols = int(np.ceil(width / stepsize))
            nrows = int(np.ceil(height / stepsize))
            for r in range(nrows):
                for c in range(ncols):        
                    if (c % ncols) == 0:
                        x = xmin

                    p = Polygon([(x,y), (x+stepsize, y), (x+stepsize, y+stepsize), (x, y+stepsize)])
                    polygons.append(p)

                    x += stepsize
                y += stepsize
                
    sample_size = int(np.ceil(test_ratio * len(polygons)))
    sample_idxs = np.random.choice(range(len(polygons)),sample_size,replace=False)

    idxs = []
    idxs = sample_idxs
            
    if len(idxs) > 0:    
        grid = gpd.GeoDataFrame({'geometry':polygons})
        grid.crs = img.crs
        sampled_grid =  grid.iloc[idxs] 
        sampled_grid["idx"] = sampled_grid.index.values

        sampled_grid.to_file(out_dir+str(year)+'.geojson')#, driver='ESRI Shapefile')
        shapes[year] =sampled_grid
        print("Year: ",year)
        print("Number of files: ",len(cut_df))
        print("Number patches: ",len(grid))
        print("Number of samples: ",len(sampled_grid))
    else:
        print(f"No additional patches for year: {year}")

Year:  2020
Number of files:  17
Number patches:  2613859
Number of samples:  7


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
