In [14]:
import numpy as np
import pandas as pd
import random
import csv
import os
import matplotlib.pyplot as plt
from collections import defaultdict
from tqdm import tqdm

import shapefile
import json
from json import dumps
import fiona
from pyproj import Proj#, transform
import pyproj
import geopandas as gpd

import shapely
from shapely.geometry import Polygon
from shapely.geometry import shape
from functools import partial
from shapely.ops import transform
from shapely.strtree import STRtree

Originally shp2geo.py

# Helper functions

In [6]:
def read_csv(shape_file, readCSV):
    """Read the coordinate of the bounding boxes and constructs and R-Tree data structure

    Args:
      shape_file : polygons
      readCSV: pandas dataframe containing bounding boxes

    Returns:
    dict, r-tree: dict of bounding boxes for each image id and r-tree
    """
    shapes = fiona.open(shape_file)
    if len(shapes.crs) != 0:
        destination = Proj(shapes.crs)
    else:
        destination = Proj('+init=EPSG:4326')
    # original = Proj('+init=EPSG:4326')
    original = Proj('+init=EPSG:3857')

    grid = dict()
    keys = ['max_lat', 'max_lon', 'min_lat', 'min_lon']
    poly_list = []
    
    for index, row in readCSV.iterrows():
        if index not in grid:
            grid[index] = dict()
        grid[index]['image_id'] = row['image_id']
        grid[index]['max_lat'] = float(row['max_lat'])
        grid[index]['max_lon'] = float(row['max_lon'])
        grid[index]['min_lat'] = float(row['min_lat'])
        grid[index]['min_lon'] = float(row['min_lon'])

        grid[index]['poly'] = shapely.geometry.box(
            grid[index]['min_lon'], grid[index]['min_lat'], grid[index]['max_lon'], grid[index]['max_lat'])
        
        # project boxes from WSG 84 to parcel projection
        project = partial(pyproj.transform, original, destination)
        grid[index]['poly'] = transform(project, grid[index]['poly'])

        # populating r-tree
        poly_obj = grid[index]['poly']
        poly_obj.name = grid[index]['image_id'] # useful for retrival in search phase
        poly_list.append(poly_obj)
        
    tree = STRtree(poly_list) # constructing R-Tree
    return grid, tree

def listit(t):
    # convert to appropriate list type 
    return list(map(listit, t)) if isinstance(t, (list, tuple)) else t


def check_polygon_in_bounds(poly, tree):
    """
    find image corrspinding to the existance of a field in the list of 
    image bounding boxes

    Args:
      poly (polygon): field
      tree (r-tree): r-tree of images

    Returns:
      List: List of intersecting images with a field
    """
    results = tree.query(poly)
    return results


def field_imageId_list(polys, count_parcels):
    """
    extract name of the intersecting polygons

    Args:
      polys (polygons): intersecting fields
      count_parcels (dict): the sanity check summary of # of fields in image ids  
    Returns:
      list: list of the image ids
    """
    list_image_ids = []
    for element in polys:
        list_image_ids.append(element.name)
        count_parcels[element.name] += 1
    return list_image_ids

# Find intersecting polygons

### France

In [3]:
def dump_shp_to_json(shape_file, grid, tree, output_json='../data/planet/france/sherrie10k/test_json'):
    """
    find intersecting polygons in the list of available images and save the GeoJSON

    Args:
      shape_file (polygons): fields
      grid (dict): image bounding boxes 
      tree (r-tre): r-tree of images
      output_json (str): output path of json file
    """
    # coordinate transformation
    reader = shapefile.Reader(shape_file)
    shapes = fiona.open(shape_file)
    if len(shapes.crs) != 0:
        original = Proj(shapes.crs)
    else:
        original = Proj('+init=EPSG:4326')
#     print(fiona.open(shape_file).crs)

    # list of properties of features
    fields = reader.fields[1:]
    field_names = [field[0] for field in fields]
    field_names.append('image_id')

    buffer = []
    # sanity check counters
    count_parcels = defaultdict(int)
#     index = 0
    counter_method1 = 0
    counter_method2 = 0
    num_matched = 0
    failed_projection = 0
  
    # loop through the polygon fields
    for sr in tqdm(reader.iterShapeRecords(), total=9517878):
#         if index % 100000 == 0:
#             print('Parsed ', index)
#         index += 1
        geom = sr.shape.__geo_interface__
        shp_geom = shape(geom)
        intersect = check_polygon_in_bounds(shp_geom, tree)
#         print(intersect)
        if len(intersect) != 0:
            num_matched += len(intersect)
#             print("Matched:", str(index))
#             print("Number matched:", num_matched)
      
            id_list = field_imageId_list(intersect, count_parcels)
            sr.record.append(id_list)
            atr = dict(zip(field_names, sr.record))
            
            geom['coordinates'] = listit(geom['coordinates'])
            try: # protection at polygons that fail at projection
                if len(geom['coordinates']) == 1: # for single polygon
                    counter_method1 += 1
                    x, y = zip(*geom['coordinates'][0])
                    lat, long = original(x, y, inverse=True) # coordinate transformation
                    geom['coordinates'] = [listit(list(zip(lat, long)))]
                else: # for multipolygons
                    counter_method2 += 1
                    for index_coord in range(0, len(geom['coordinates'])):
                        for counter in range(0,len(geom['coordinates'][index_coord])):
                            x, y = geom['coordinates'][index_coord][counter]
                            lat, long = original(x, y, inverse=True) # coordinate transformation
                            geom['coordinates'][index_coord][counter] = [lat, long] #(long, lat)
            except:
                failed_projection =+ 1
#                 print(geom['coordinates'])
            buffer.append(dict(type="Feature", geometry=geom, properties=atr))
            
#             if num_matched > 10:
#                 break
      
      
    # write the GeoJSON file
    output_json_interval = output_json + str(num_matched) + '.json'
    print("saving json")
    with open(output_json_interval, 'w') as geojson:
        geojson.write(dumps({"type": "FeatureCollection", "features": buffer}, indent=2) + "\n")
        geojson.close()
        print('saved', output_json_interval)
    
    # print summary
    print('method one count:', counter_method1)
    print('method two count:', counter_method2)
    print("Number matched:", num_matched)
    print('failed count', failed_projection)

In [7]:
test = fiona.open(shape_file)

In [18]:
list(test.schema['properties'].keys())

['ID_PARCEL',
 'SURF_PARC',
 'CODE_CULTU',
 'CODE_GROUP',
 'CULTURE_D1',
 'CULTURE_D2']

In [12]:
for t in test:
    print(t['geometry'])
    break

{'type': 'MultiPolygon', 'coordinates': [[[(701200.320100002, 6883238.830900002), (700676.8660000041, 6882785.608600002), (700657.6099999994, 6882831.337000001), (700652.1803000048, 6882844.3105), (700646.9530000016, 6882853.198800001), (700627.3748000041, 6882878.875500001), (700617.5866999999, 6882893.216300003), (700613.4576999992, 6882901.178100001), (700585.1190000027, 6882964.341000002), (700568.450000003, 6883002.176000003), (700553.0282000005, 6883039.002), (700539.0053000003, 6883078.2927), (700540.7251000032, 6883084.113500003), (700639.8117000014, 6883181.4804), (700742.3322999999, 6883282.026700001), (701020.0553000048, 6883553.359200001), (701148.2096000016, 6883518.824800003), (701147.2836000025, 6883498.9810000025), (701143.9200000018, 6883446.650000002), (701143.1825000048, 6883398.5715), (701146.2990000024, 6883375.501900002), (701154.2087000012, 6883351.177100003), (701166.5234000012, 6883320.895800002), (701164.6138000041, 6883310.729600001), (701174.5357000008, 6883

In [None]:
base_dir = '../data/planet/france/sherrie10k/'
# csv_file = os.path.join(base_dir, 'bbox10k.csv')
# csv_file = os.path.join(base_dir, 'bbox10k_1250px.csv')
csv_file = os.path.join(base_dir, 'bbox10k_2500px.csv')

shape_file = '../data/parcels/france/RPG_2-0__SHP_LAMB93_FR-2018_2018-01-15/RPG/1_DONNEES_LIVRAISON_2018/RPG_2-0_SHP_LAMB93_FR-2018/PARCELLES_GRAPHIQUES.shp'
# TODO: update shape file to 2019

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

for start in np.arange(0, 1500, 250): # np.arange(1500, 10000, 250):
    end = start + 250
    images_df = pd.read_csv(csv_file).iloc[start:end]
    images_df['image_id'] = images_df['image_id'].astype(str).str.zfill(5)
    grid, tree = read_csv(shape_file, images_df)
    
    dump_shp_to_json(shape_file, grid, tree, 
                     '../data/planet/france/sherrie10k/json_polys/bbox10k_2500px_{}_'.format(int(start/250)))

### 2019 Geopackage

In [7]:
def dump_shp_to_json(shape_file, grid, tree, output_json='../data/planet/france/sherrie10k/test_json'):
    """
    find intersecting polygons in the list of available images and save the GeoJSON

    Args:
      shape_file (polygons): fields
      grid (dict): image bounding boxes 
      tree (r-tre): r-tree of images
      output_json (str): output path of json file
    """
    # coordinate transformation
    shapes = fiona.open(shape_file)
    if len(shapes.crs) != 0:
        original = Proj(shapes.crs)
    else:
        original = Proj('+init=EPSG:4326')

    # list of properties of features
#     field_names = shapes.schema['properties'].keys()
#     field_names.append('image_id')
    
    # sanity check counters
    buffer = []
    count_parcels = defaultdict(int)
    index = 0
    counter_method1 = 0
    counter_method2 = 0
    num_matched = 0
    failed_projection = 0
  
    # loop through the polygon fields
    for sr in tqdm(shapes, total=9517878):
#         if index % 100000 == 0:
#             print('Parsed ', index)
#         index += 1
        geom = sr['geometry']
        shp_geom = shape(geom)
        intersect = check_polygon_in_bounds(shp_geom, tree)
#         print(intersect)
        if len(intersect) != 0:
            num_matched += len(intersect)
#             print("Matched:", str(index))
#             print("Number matched:", num_matched)
      
            id_list = field_imageId_list(intersect, count_parcels)
            atr = dict(sr['properties'])
            atr['image_id'] = id_list
#             sr.record.append(id_list)
#             atr = dict(zip(field_names, sr.record))
            
            geom['coordinates'] = listit(geom['coordinates'])
            try: # protection at polygons that fail at projection
                if len(geom['coordinates']) == 1: # for single polygon
                    counter_method1 += 1
                    x, y = zip(*geom['coordinates'][0][0])
                    lat, long = original(x, y, inverse=True) # coordinate transformation
                    geom['coordinates'] = [listit(list(zip(lat, long)))]
                else: # for multipolygons
                    counter_method2 += 1
                    for index_coord in range(0, len(geom['coordinates'])):
                        for counter in range(0,len(geom['coordinates'][index_coord][0])):
                            x, y = geom['coordinates'][index_coord][0][counter]
                            lat, long = original(x, y, inverse=True) # coordinate transformation
                            geom['coordinates'][index_coord][counter] = [lat, long] #(long, lat)
            except:
                failed_projection += 1
#                 print(geom['coordinates'])
            buffer.append(dict(type="Feature", geometry=geom, properties=atr))
            
            # for debugging
#             if num_matched > 10:
#                 break
      
      
    # write the GeoJSON file
    output_json_interval = output_json + str(num_matched) + '.json'
    print("saving json")
    with open(output_json_interval, 'w') as geojson:
        geojson.write(dumps({"type": "FeatureCollection", "features": buffer}, indent=2) + "\n")
        geojson.close()
        print('saved', output_json_interval)
    
    # print summary
    print('method one count:', counter_method1)
    print('method two count:', counter_method2)
    print("Number matched:", num_matched)
    print('failed count', failed_projection)

In [16]:
parcels = gpd.read_file('../data/parcels/france/RPG_2-0_GPKG_LAMB93_FR-2019/RPG/1_DONNEES_LIVRAISON_2019/RPG_2-0_GPKG_LAMB93_FR-2019/PARCELLES_GRAPHIQUES.gpkg')

In [17]:
parcels.crs

{'init': 'epsg:2154'}

In [20]:
parcels = parcels.set_crs("EPSG:4326")

AttributeError: 'GeoDataFrame' object has no attribute 'set_crs'

In [18]:
parcels3857 = parcels.to_crs("EPSG:3857")

RuntimeError: b'no arguments in initialization list'

In [None]:
parcels3857.to_file('../data/parcels/france/RPG_2-0_GPKG_LAMB93_FR-2019/PARCELLES_GRAPHIQUES_3857.gpkg')

In [None]:
# base_dir = '../data/planet/france/sherrie10k/'
base_dir = '../data/planet/france/'
# csv_file = os.path.join(base_dir, 'bbox10k.csv')
# csv_file = os.path.join(base_dir, 'bbox10k_1250px.csv')
# csv_file = os.path.join(base_dir, 'bbox10k_2500px.csv')
csv_file = os.path.join(base_dir, 'bbox_1250px_epsg3857.csv')

# shape_file = '../data/parcels/france/RPG_2-0__SHP_LAMB93_FR-2018_2018-01-15/RPG/1_DONNEES_LIVRAISON_2018/RPG_2-0_SHP_LAMB93_FR-2018/PARCELLES_GRAPHIQUES.shp'
# TODO: update shape file to 2019
shape_file = '../data/parcels/france/RPG_2-0_GPKG_LAMB93_FR-2019/RPG/1_DONNEES_LIVRAISON_2019/RPG_2-0_GPKG_LAMB93_FR-2019/PARCELLES_GRAPHIQUES.gpkg'

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

# 300px and 1250px images
images_per_file = 1000
for start in np.arange(0, 10000, images_per_file):
    end = start + images_per_file
    images_df = pd.read_csv(csv_file).iloc[start:end]
    images_df = images_df.rename({'min_x': 'min_lon', 'max_x': 'max_lon', 'min_y': 'min_lat', 'max_y': 'max_lat'}, axis=1)
    images_df['image_id'] = images_df['image_id'].astype(str).str.zfill(5)
    grid, tree = read_csv(shape_file, images_df)

    dump_shp_to_json(shape_file, grid, tree, 
                     '../data/planet/france/json_polys/bbox10k_1250px_{}_'.format(int(start/images_per_file)))

# 2500px images
# images_per_file = 250 
# for start in np.arange(250, 10000, images_per_file):
#     end = start + images_per_file
#     images_df = pd.read_csv(csv_file).iloc[start:end]
#     images_df['image_id'] = images_df['image_id'].astype(str).str.zfill(5)
#     grid, tree = read_csv(shape_file, images_df)
    
#     dump_shp_to_json(shape_file, grid, tree, 
#                      '../data/planet/france/sherrie10k/json_polys_2019/bbox10k_2500px_{}_'.format(int(start/images_per_file)))

9604463it [11:09, 14341.47it/s]                             


saving json
saved ../data/planet/france/json_polys/bbox10k_1250px_0_285237.json
method one count: 281794
method two count: 0
Number matched: 285237
failed count 0


9604463it [10:54, 14676.94it/s]                             


saving json
saved ../data/planet/france/json_polys/bbox10k_1250px_1_304170.json
method one count: 299950
method two count: 0
Number matched: 304170
failed count 0


9604463it [10:48, 14804.16it/s]                             


saving json
saved ../data/planet/france/json_polys/bbox10k_1250px_2_292643.json
method one count: 286693
method two count: 0
Number matched: 292643
failed count 0


9604463it [10:51, 14747.04it/s]                             


saving json
saved ../data/planet/france/json_polys/bbox10k_1250px_3_287396.json
method one count: 283358
method two count: 2
Number matched: 287396
failed count 2


 94%|█████████▍| 8979870/9517878 [10:04<00:36, 14667.36it/s]

### India

In [19]:
# defined a new dump_shp_to_json function for india because the projection wasn't working...
# the parcels should already be in LAT, LON and don't need to be reprojected
# but somehow the inverse projection was messing things up
# TODO: fix this in a general way

In [19]:
def dump_shp_to_json(shape_file, grid, tree, output_json='../data/planet/france/sherrie10k/test_json'):
    """
    find intersecting polygons in the list of available images and save the GeoJSON

    Args:
      shape_file (polygons): fields
      grid (dict): image bounding boxes 
      tree (r-tre): r-tree of images
      output_json (str): output path of json file
    """
    # coordinate transformation
    reader = shapefile.Reader(shape_file)
    shapes = fiona.open(shape_file)
    if len(shapes.crs) != 0:
        original = Proj(shapes.crs)
    else:
        original = Proj('+init=EPSG:4326')
    print(fiona.open(shape_file).crs)
    print(original)

    # list of properties of features
    fields = reader.fields[1:]
    field_names = [field[0] for field in fields]
    field_names.append('image_id')

    buffer = []
    # sanity check counters
    count_parcels = defaultdict(int)
#     index = 0
    counter_method1 = 0
    counter_method2 = 0
    num_matched = 0
    failed_projection = 0
  
    # loop through the polygon fields
    for sr in tqdm(reader.iterShapeRecords(), total=10000):
#         if index % 100000 == 0:
#             print('Parsed ', index)
#         index += 1
        geom = sr.shape.__geo_interface__
        shp_geom = shape(geom)
        intersect = check_polygon_in_bounds(shp_geom, tree)
#         print(intersect)
        if len(intersect) != 0:
            num_matched += len(intersect)
#             print("Matched:", str(index))
#             print("Number matched:", num_matched)
      
            id_list = field_imageId_list(intersect, count_parcels)
            sr.record.append(id_list)
            atr = dict(zip(field_names, sr.record))
            
            geom['coordinates'] = listit(geom['coordinates'])
#             print(geom)
            try: # protection at polygons that fail at projection
                if len(geom['coordinates']) == 1: # for single polygon
                    counter_method1 += 1
                    x, y = zip(*geom['coordinates'][0])
#                     lat,long = x, y
                    lat, long = original(x, y, inverse=True) # coordinate transformation
                    geom['coordinates'] = [listit(list(zip(lat, long)))]
                else: # for multipolygons
                    counter_method2 += 1
                    for index_coord in range(0, len(geom['coordinates'])):
                        for counter in range(0,len(geom['coordinates'][index_coord])):
                            x, y = geom['coordinates'][index_coord][counter]
                            lat, long = original(x, y, inverse=True) # coordinate transformation
                            geom['coordinates'][index_coord][counter] = [lat, long] #(long, lat)
            except:
                failed_projection =+ 1
#                 print(geom['coordinates'])
            buffer.append(dict(type="Feature", geometry=geom, properties=atr))
            
#             if num_matched > 10:
#                 break
      
      
    # write the GeoJSON file
    output_json_interval = output_json + str(num_matched) + '.json'
    print("saving json")
    with open(output_json_interval, 'w') as geojson:
        geojson.write(dumps({"type": "FeatureCollection", "features": buffer}, indent=2) + "\n")
        geojson.close()
        print('saved', output_json_interval)
    
    # print summary
    print('method one count:', counter_method1)
    print('method two count:', counter_method2)
    print("Number matched:", num_matched)
    print('failed count', failed_projection)

In [None]:
base_dir = '../data/planet/india/'
csv_file = os.path.join(base_dir, 'bbox1000.csv')

shape_file = '../mount/data/india_parcels/india_parcels_with_area.shp'

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

# for start in np.arange(0, 1500, 250): # np.arange(1500, 10000, 250):
#     end = start + 250
#     images_df = pd.read_csv(csv_file).iloc[start:end]

images_df = pd.read_csv(csv_file)
images_df['image_id'] = images_df['image_id'].astype(str).str.zfill(5)
images_df = images_df[images_df['image_id'].isin(['00064', '00126'])]

grid, tree = read_csv(shape_file, images_df)

dump_shp_to_json(shape_file, grid, tree, 
                 '../data/planet/india/json_polys/bbox1000_labeled')

### GeoWiki

In [24]:
base_dir = '../data/planet/india/geowiki/'
csv_file = os.path.join(base_dir, 'geowiki_maharashtra.csv')

shape_file = '../mount/data/india_parcels/india_geowiki_parcels_with_area.shp'

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

images_df = pd.read_csv(csv_file)
images_df = images_df[images_df['image_id'].isin([960228])]

grid, tree = read_csv(shape_file, images_df)

dump_shp_to_json(shape_file, grid, tree, 
                 '../data/planet/india/geowiki/json_polys/geowiki_labeled')

  0%|          | 266/9517878 [00:00<19:01, 8339.57it/s]

saving json
saved ../data/planet/india/geowiki/json_polys/geowiki_labeled259.json
method one count: 259
method two count: 0
Number matched: 259
failed count 0





### General Blockchain

In [11]:
base_dir = '../data/planet/india/GeneralBlockchain/'
csv_file = os.path.join(base_dir, 'bbox_india_GB_v1.csv')

shape_file = '../mount/data/GeneralBlockchain/campaign_results/india_fields_with_area.shp'

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

df = pd.read_csv(csv_file)
grid, tree = read_csv(shape_file, df)

dump_shp_to_json(shape_file, grid, tree, 
                 '../data/planet/india/GeneralBlockchain/json_polys/bbox_images')

  0%|          | 10013/9517878 [00:09<2:22:33, 1111.64it/s]


saving json
saved ../data/planet/india/GeneralBlockchain/json_polys/bbox_images26405.json
method one count: 8788
method two count: 15
Number matched: 26405
failed count 0


### Large Planet

In [None]:
base_dir = '../data/planet/india/GeneralBlockchain/'
csv_file = os.path.join(base_dir, 'bbox_india_GB_download_v2.csv')

shape_file = '../mount/data/GeneralBlockchain/campaign_results/india_fields_with_area.shp'

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

df = pd.read_csv(csv_file)
grid, tree = read_csv(shape_file, df)

dump_shp_to_json(shape_file, grid, tree, 
                 '../data/planet/india/GeneralBlockchain/json_polys/bbox_images')

##### Debug

In [22]:
base_dir = '../data/planet/india/GeneralBlockchain/'
csv_file = os.path.join(base_dir, 'bbox_india_GB_download_actual.csv')

shape_file = '../mount/data/GeneralBlockchain/campaign_results/india_fields_with_area.shp'

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

df = pd.read_csv(csv_file)
grid, tree = read_csv(shape_file, df)

dump_shp_to_json(shape_file, grid, tree, 
                 '../data/planet/india/GeneralBlockchain/json_polys/bbox_images')

  0%|          | 10013/9517878 [00:04<1:03:30, 2494.89it/s]


saving json
saved ../data/planet/india/GeneralBlockchain/json_polys/bbox_images10027.json
method one count: 9997
method two count: 16
Number matched: 10027
failed count 10013


In [13]:
shapes = fiona.open(shape_file)

In [14]:
shapes.crs

{'init': 'epsg:4326'}

### Large Airbus images

In [8]:
base_dir = '../data/general_blockchain/'
csv_file = os.path.join(base_dir, 'bbox_india_GB_large_Airbus.csv')

shape_file = '../mount/data/GeneralBlockchain/campaign_results/india_fields_with_area.shp'

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

df = pd.read_csv(csv_file)
grid, tree = read_csv(shape_file, df)

dump_shp_to_json(shape_file, grid, tree, 
                 '../data/general_blockchain/json_polys/bbox_Airbus_large')

  0%|          | 9966/9517878 [00:04<1:09:28, 2280.77it/s]


saving json
saved ../data/general_blockchain/json_polys/bbox_Airbus_large9975.json
method one count: 9898
method two count: 68
Number matched: 9975
failed count 0


## Africa

### Senegal

In [5]:
base_dir = '../data/planet/senegal/'
csv_file = os.path.join(base_dir, 'bbox_tiles_all.csv')

shape_file = '../mount/data/senegal_parcels/SenegalFields_03_26.shp'

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

df = pd.read_csv(csv_file)
df.shape

(14543, 5)

In [8]:
increment = 20000
    
for start in np.arange(0, df.shape[0], increment):
    end = start + increment
    images_df = pd.read_csv(csv_file).iloc[start:end]

    images_df['image_id'] = images_df['image_id'].astype(str).str.zfill(5)
    grid, tree = read_csv(shape_file, images_df)
    
    dump_shp_to_json(shape_file, grid, tree, 
                     '../data/planet/senegal/json_polys/bbox_tiles_{}_'.format(int(start/increment)))

  0%|          | 2590/9517878 [00:01<1:17:12, 2053.89it/s]


saving json
saved ../data/planet/senegal/json_polys/bbox_tiles_0_3293.json
method one count: 2579
method two count: 10
Number matched: 3293
failed count 1


### Ghana

In [5]:
base_dir = '../data/planet/ghana/udry/'
csv_file = os.path.join(base_dir, 'bbox_tiles_all.csv')

shape_file = '../mount/data/udry_parcels/udry_fields_2017.shp'

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

df = pd.read_csv(csv_file)
df.shape

(18023, 5)

In [10]:
increment = 20000
    
for start in np.arange(0, df.shape[0], increment):
    end = start + increment
    images_df = pd.read_csv(csv_file).iloc[start:end]

    images_df['image_id'] = images_df['image_id'].astype(str).str.zfill(5)
    grid, tree = read_csv(shape_file, images_df)
    
    dump_shp_to_json(shape_file, grid, tree, 
                     '../data/planet/ghana/udry/json_polys/bbox_tiles_{}_1'.format(
                         int(start/increment)))

  0%|          | 8938/9517878 [00:04<1:23:23, 1900.41it/s]


saving json
saved ../data/planet/ghana/udry/json_polys/bbox_tiles_0_111334.json
method one count: 8938
method two count: 0
Number matched: 11334
failed count 0


### Malawi

In [12]:
base_dir = '../data/planet/malawi/'
csv_file = os.path.join(base_dir, 'bbox_tiles_all.csv')

shape_file = '../mount/data/malawi_parcels/malawi_WFP_fields_2018.shp'

if os.path.exists(os.path.join(base_dir, 'json_polys')) == False:
    os.makedirs(os.path.join(base_dir, 'json_polys'))

df = pd.read_csv(csv_file)
df.shape

(2372, 5)

In [13]:
increment = 20000
    
for start in np.arange(0, df.shape[0], increment):
    end = start + increment
    images_df = pd.read_csv(csv_file).iloc[start:end]

    images_df['image_id'] = images_df['image_id'].astype(str).str.zfill(5)
    grid, tree = read_csv(shape_file, images_df)
    
    dump_shp_to_json(shape_file, grid, tree, 
                     '../data/planet/malawi/json_polys/bbox_tiles_{}_'.format(int(start/increment)))

  0%|          | 423/9517878 [00:00<1:16:26, 2074.92it/s]

saving json
saved ../data/planet/malawi/json_polys/bbox_tiles_0_499.json
method one count: 423
method two count: 0
Number matched: 499
failed count 0



