In [1]:
import geopandas as gpd
import rasterio
import os
import numpy as np
import pandas as pd
from shapely.ops import unary_union

In [2]:
cutlines_path = "/home/jovyan/work/satellite_data/ku_sync/South_Africa/cutlines/"
years = list(range(2020,2007,-1))

In [3]:
gdfs = {}
for y in years:
    f = os.path.join(cutlines_path,str(y)+".geojson")
    df = gpd.read_file(f)
    
    #Remove duplicates from first and second transfer
    df = df.drop_duplicates("id",keep="last")
    gdfs[y] = df

In [4]:
shape_df = None
for y in years:
    idx = 1
    if shape_df is None:
        shape_df = gdfs[y]
        shape_df["year"] = y
        shape_df["y_idx"] = list(range(1,len(shape_df)+1))
        
    else:
        g = []
        rows = []
        
        new_df = shape_df[shape_df["year"] > y]
        old_df = gdfs[y]

        for k,row in old_df.iterrows():
            geom = row["geometry"]
            intersect_idx = np.where(new_df.intersects(geom))[0]
            if len(intersect_idx) > 0:
                #merge polygons into one
                boundary = gpd.GeoSeries(unary_union(new_df["geometry"].iloc[intersect_idx]))
                if not boundary.contains(geom).any():
                    #only if it is significantly more
                    isec = geom.intersection(boundary[0])
                    overlap = isec.area / geom.area
                    if overlap < 0.95:
                        g.append(geom)
                        rows.append([row["id"],y,idx])
                        idx += 1
            else:
                g.append(geom)
                rows.append([row["id"],y,idx])
                idx += 1
        df = gpd.GeoDataFrame(rows,crs="EPSG:4326", columns=["id","year","y_idx"],geometry=g)
        shape_df = gpd.GeoDataFrame( pd.concat( [shape_df,df], ignore_index=True) )


In [5]:
shape_df.to_file("SA.geojson")

In [6]:
shape_df[shape_df["year"] == 2009].to_file("2009.geojson")