# Processing Hazelnut Yield

The raw data provided contains data points with timestamps, GPS coordinates, and collection cart weight. This information was collected as a cart moves along rows of trees and harvests nuts.

The goal is to create a heatmap of productivity. We measure change in weight with respect to distance traveled, and pool data from multiple harvests to display the total productivity spatially.

In [1]:
import sys
import re
import os
import json
import pandas as pd
import geopandas
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
import webcolors

Our method relies on configuring the boundaries of the fields. In the [configuration file](files/config.json), the coordinates of the corners of multiple fields are specified.

In [2]:
def create_field_polygons(cfg):
    # Creating a GeoPandas polygon for each field in config
    field_corners = {}
    for field in cfg['field_boundaries']:
        field_corners[field['name']] = Polygon(
                            [(field['NW_corner']['latitude'],field['NW_corner']['longitude']),
                            (field['NE_corner']['latitude'], field['NE_corner']['longitude']),
                            (field['SE_corner']['latitude'], field['SE_corner']['longitude']),
                            (field['SW_corner']['latitude'], field['SW_corner']['longitude'])])
    field_polygons = geopandas.GeoSeries(field_corners)
    return field_polygons

When we load the raw CSV lists, we ignore null points:

In [3]:
def load_file(cfg, filename):
    csv_directory = cfg['csv_directory']
    df = pd.read_csv(f'{csv_directory}/{filename}', encoding = "ISO-8859-1")

    data = []
    for index, row in df.iterrows():
        try:
            point = Point(float(row['lat']), float(row['long']))
            # If the point does not contain zeros, add to list
            if not np.isnan(point.x) and not np.isnan(point.y):
                data.append({'latitude': row['lat'], 'longitude':row['long'], 'weight':row['weight']})
        except:
            pass
    return data

The quantize_yield_intensity function does the heavy lifting. It iterates over points until the configured distance interval is exceeded. It then calculates change in weight over distance (yield intensity), and applies that property to a rectangle surrounding the path traversed. This removes the necessity of a fixed grid overlayed on the field, allowing for flexibility in resolution. The preciseness is limited by either the configured distance interval, or the frequecy of data collection.

The rectangles are trimmed by the field boundary, so the configured corners should be strict so travel between rows is removed.

Helper functions exist to create these rectangles, and to remove noisy data. Weight shifting around the cart is identified by negative weight changes. We also remove preceeding and succeeding data points to negative weight changes, as these points have artifically high weight changes due to the nuts shifting back.

In [4]:
def quantize_yield_intensity(cfg, data, field_polygons):

    def create_associated_region(initial_location, final_location, field_name):
        # Calculating vector for line traversed
        lx = final_location.x - initial_location.x
        ly = final_location.y - initial_location.y

        # Calculating normal vector to line traversed
        ny = lx/ly
        n_mag = np.sqrt(1 + ny**2)
        ny = ny/n_mag
        nx = 1/n_mag

        # Create rectangle surrounding line
        pool_height = cfg['pool_height']
        region = Polygon([(initial_location.x + nx*(pool_height/2.0), initial_location.y + ny*(pool_height/2.0)),
                          (final_location.x + nx*(pool_height/2.0), final_location.y + ny*(pool_height/2.0)),
                          (final_location.x - nx*(pool_height/2.0), final_location.y - ny*(pool_height/2.0)),
                          (initial_location.x - nx*(pool_height/2.0), initial_location.y - ny*(pool_height/2.0))])


        # Trim region to configured fields
        field_df = geopandas.GeoDataFrame(geometry=geopandas.GeoSeries(field_polygons[field_name]))
        region_df = geopandas.GeoDataFrame(geometry=geopandas.GeoSeries(region))
        trimmed_region = geopandas.overlay(field_df, region_df, how='intersection')

        return trimmed_region
    

    def remove_noise(yield_intensities):
        for field_name in yield_intensities:
            field = yield_intensities[field_name]
            noise = set()
            for i in range(0, len(field)):
                if field[i]['yield_intensity'] <= 0 or field[i]['yield_intensity'] > cfg['max_yield_intensity']:
                    noise.add(i)
                    noise.add(i+1)
                    noise.add(i-1)

            field_noise_removed = []
            for i in range(0, len(field)):
                if i not in list(noise):
                    field_noise_removed.append(field[i])
            yield_intensities[field_name] = field_noise_removed

        return yield_intensities

    
    # Initialize yield intensities object with empty lists for each configured field
    yield_intensities = {}
    for field in cfg['field_boundaries']:
        field_name = field['name']
        yield_intensities[field_name] = []

        if len(data) > 0:
            initial_point = data[0]
            for i in range(0, len(data)):
                initial_location = Point(float(initial_point['latitude']), float(initial_point['longitude']))
                i_location = Point(float(data[i]['latitude']), float(data[i]['longitude']))
                distance = geopandas.GeoSeries(initial_location).distance(geopandas.GeoSeries(i_location))[0]

                if distance > cfg['min_distance'] and distance < cfg['max_distance']:
                    try:
                        # Removing anything that isn't a decimal point or a digit from recorded weights
                        i_weight = float(re.sub('[^A-Za-z0-9.]+', '', data[i]['weight']))
                        initial_weight = float(re.sub('[^A-Za-z0-9.]+', '', initial_point['weight']))

                        yield_intensity = (i_weight - initial_weight)/distance
                        region = create_associated_region(initial_location, i_location, field_name)
                        if any([not empty for empty in region['geometry'].is_empty.tolist()]):
                            yield_intensities[field_name].append({'region': region, 'yield_intensity': yield_intensity})
                        
                        # Reset initial point for next iteration
                        initial_point = data[i]
                    except:
                        pass
                    
                if distance > cfg['max_distance']:
                    initial_point = data[i]

    return remove_noise(yield_intensities)

This function pools the data from each harvest day. Intensities of intersecting areas are summed. Some smoothing needs to be implemented to avoid small polygons with outlier intensities.

In [8]:
def pool_data(processed_files):
    pooled_yield_intensities = {}
    for field in cfg['field_boundaries']:
        pooled_yield_intensities[field['name']] = []
    for processed_file in processed_files:
        for field in cfg['field_boundaries']:
            field_name = field['name']
            for new_yield in processed_file[field_name]:
                
                # Determining regions of intersection for new yield to be added to pool, and existing regions in pool
                intersection_indices = []
                for i, pool_yield in enumerate(pooled_yield_intensities[field_name]): 
                    if not geopandas.overlay(new_yield['region'], pool_yield['region'], how='intersection').empty:
                        intersection_indices.append(i)
    
                # In regions of intersection, sum yield
                intersection_yields = []
                for i in intersection_indices:
                    intersection_yield = pooled_yield_intensities[field_name][i]
                    intersection_yield['yield_intensity'] += new_yield['yield_intensity']
                    intersection_yield['region'] = geopandas.overlay(new_yield['region'], intersection_yield['region'], how='intersection')
                    intersection_yields.append(intersection_yield)
                    
                    # Remove region of intersection from new and pooled yields
                    new_yield['region'] = geopandas.overlay(new_yield['region'], intersection_yield['region'], how='difference')
                    pooled_yield_intensities[field_name][i]['region'] = geopandas.overlay(pooled_yield_intensities[field_name][i]['region'], intersection_yield['region'], how='difference')
                    
                # Add remaining new_yield and regions of intersection to pooled yields
                pooled_yield_intensities[field_name].append(new_yield)
                for intersection_yield in intersection_yields:
                    pooled_yield_intensities[field_name].append(intersection_yield)
                
                        # Sum intensities in intersecting areas
#                         intersection = pool_yield
#                         intersection['yield_intensity'] += new_yield['yield_intensity']
#                         intersection['region'] = geopandas.overlay(new_yield['region'], pool_yield['region'], how='intersection')
#                         total_yield_intensities[field_name].append(intersection)
                        
                        # TODO: remove intersection areas from entry and existing entries
                        # Remove intersection from entry
                        #  entry['region'] = geopandas.overlay(entry['region'], intersection['region'], how='difference')
                        # Remove intersection from existing entry
                        #  existing_entry['region'] = geopandas.overlay(existing_entry['region'], intersection['region'], how='difference')
                        
#                 if any([not empty for empty in entry['region']['geometry'].is_empty.tolist()]):
#                     total_yield_intensities[field_name].append(entry)

    return pooled_yield_intensities

The following block uses the defined functions sequentially. It will take roughly 10 minutes.

In [None]:
with open("config.json", 'r') as json_data_file:
#     cfg = json.load(json_data_file)
#     field_polygons = create_field_polygons(cfg)
#     processed_files = []
#     for filename in os.listdir(cfg['csv_directory']):
#         if filename.lower().endswith(".csv"):
#             data = load_file(cfg, filename)
#             processed_files.append(quantize_yield_intensity(cfg, data, field_polygons))
    total_yield_intensities = pool_data(processed_files)

False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
False
False
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True


True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
T

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
False
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
Tru

The following block exports the data for visualization on geojson.io

In [None]:
def rgb(minimum, maximum, value):
    minimum, maximum = float(minimum), float(maximum)
    ratio = 2 * (value-minimum) / (maximum - minimum)
    b = int(max(0, 255*(1 - ratio)))
    r = int(max(0, 255*(ratio - 1)))
    g = 255 - b - r
    return r, g, b

for field in cfg['field_boundaries']:
    field_name = field['name']
    export_list = []
    max_intensity = 0
    for entry in total_yield_intensities[field_name]:
        if entry['yield_intensity'] > max_intensity:
            max_intensity = entry['yield_intensity']

    for entry in total_yield_intensities[field_name]:
        entry['region']['yield_intensity'] = entry['yield_intensity']
        entry['region']['fill'] = webcolors.rgb_to_hex(rgb(0, max_intensity, entry['yield_intensity']))
        export_list.append(geopandas.GeoDataFrame(entry['region']))

    export_object = geopandas.GeoDataFrame(pd.concat(export_list, ignore_index=True))
    export_object.to_file(f'{field_name}_intensity.json', driver="GeoJSON")

    # geojson.io requires coordinates in lat, long
    with open(f'{field_name}_intensity.json', 'r') as json_data_file:
        yield_intensities = json.load(json_data_file)
        for feature in yield_intensities['features']:
            polygon = feature['geometry']['coordinates']
            for corner in polygon:
                for coordinates in corner:
                    long = coordinates[0]
                    lat = coordinates[1]
                    coordinates[0] = lat
                    coordinates[1] = long

        with open(f'{field_name}_intensity_lat_long.json', 'w') as outfile:
            json.dump(yield_intensities, outfile)