In [1]:
import ipywidgets as widgets
import xdcm_dash_functions as f
from matplotlib import rcParams
from datetime import datetime

%load_ext autoreload
%autoreload 2
%matplotlib inline

from xdcm_dash_functions import *

"""
Colorization and visualization utilities.
"""

import numpy as np
import skimage.transform as skt

#from brtlearn.shared import config
PIX_DATA_CLASS_DIRT = 0
PIX_DATA_CLASS_WEED = 1
PIX_DATA_CLASS_CROP = 2

# filter out images that are not NRG (e.g no RGB images), robots that are not DCMS and images with no crop label. 
start = datetime(2020, 3,7)

# filter images by robot_name, image type, crop, etc
filters = {"artifacts.kind": "nrg", "crop_name": {"$ne": None}, 'robot_name':{"$in":['DCM-MANATEE', 'DCM-WALRUS', 'DCM-SEAL', 'DCM-OTTER', 'DCM-PORPOISE', 'DCM-DOLPHIN']}}
full_df, elapsed_time = f.get_shasta_data(filters =filters, start=start)
full_df['date_collected'] = pd.to_datetime(full_df['collected_on'].dt.date)
print(f"Queried {len(full_df)} images in {elapsed_time:.2f} s.")

Queried 283979 images in 103.60 s.


In [37]:
# What proportion of our data is from machines and what is from DCMs?

qdf =full_df.copy()
qdf = qdf[qdf['crop_name']=='SOYBEANS']
mach = qdf[qdf['robot_name'].isin(['SHASTA-FB-BRADLEY', 'SHASTA-FB-PALADIN', 'ATM-DUCKDUCK', 'ATM-GOOSE', 'BLACKBIRD'])]
dcm =  qdf[qdf['robot_name'].isin(['DCM-MANATEE', 'DCM-WALRUS', 'DCM-SEAL', 'DCM-OTTER', 'DCM-PORPOISE', 'DCM-DOLPHIN'])]

tot = len(mach[mach['has_human_annotation']==True]) + len(dcm[dcm['has_human_annotation']==True])
print(len(mach[mach['has_human_annotation']==True])/tot) # 2.4% of machine data has been annotated
print(len(dcm[dcm['has_human_annotation']==True])/tot) # 15.3% of dcm data has been annotated

0.10199587542381768
0.8980041245761823


In [2]:
# Grab list of unique operating_field_names

fields = full_df.operating_field_name.unique()


In [None]:
# parameters the same field should not have:

# different crop
# different date
# different grower
# different farm
# different robot_name
# wide geographic area (distance between points>threshold)
# less or more than 3 camera heights in field
# field name closely matches another field name in the same geographic area
# 



In [17]:
# Identify the fields with 2 or more crop_names

qdf = full_df.copy()

multiple_dates = []

for f in fields:
    of = qdf[qdf['operating_field_name']==f]
    if len(of.date_collected.unique()) >1:
        multiple_dates.append(f)
print(multiple_crops)
print(f'there are {len(multiple_dates)} fields with more than one date')

['parking lot', '9', '1', 'barn', 'bordo nuevo', 'jkhg', 'cementary', 'lost john north', 'loading docks', 'kjh', 'river field', 'JDTIC', '23', '7', 'west', 'South', '2', 'East', 'North', 'West', '10', 'Home', '60', 'leathers west', 'lkjh']
there are 25 fields with more than one date


In [18]:
qdf = full_df.copy()
ofn = '9'
grower = 'arva intelligence'
op = qdf[(qdf['operating_field_name']== ofn)]
gr = qdf[qdf['grower']==grower]
#op.to_csv('op.csv')
labeled = op[op['has_human_annotation']==True]
#print(op.groupby('has_human_annotation').count())
print(f'{len(labeled)} images from this field have been labeled.')
print(f'There were {len(op)} images collected from the {ofn} field.')
#print(op.experiment_id.unique())
#print(op.date_collected.unique())
#print(op.robot_name.unique())



for m in multiple_dates:
    op = qdf[(qdf['operating_field_name']== m)]
    dates =op.date_collected.unique()
    print(m)
    # Determine if the fields are the same geographic area
    for d in dates:
        fn = op[op['date_collected']==d]
        features = [d.get('coordinates') for d in fn.gnss] # Extract GPS coordinates
        lat = [features[i][1] for i in range(len(features))]
        lng = [features[i][0] for i in range(len(features))]
        print([d, np.mean(lat), np.mean(lng)])



446 images from this field have been labeled.
There were 1582 images collected from the 9 field.
parking lot
[numpy.datetime64('2020-03-24T00:00:00.000000000'), 30.64649651473682, -96.30231331684212]
[numpy.datetime64('2020-03-25T00:00:00.000000000'), 30.884954363829774, -96.66929634858157]
[numpy.datetime64('2020-04-08T00:00:00.000000000'), 30.633122056140337, -96.32643377017546]
9
[numpy.datetime64('2020-03-25T00:00:00.000000000'), 30.899585775343805, -96.66793961786901]
[numpy.datetime64('2020-06-20T00:00:00.000000000'), 36.67644803930822, -89.7493543109935]
1
[numpy.datetime64('2020-04-08T00:00:00.000000000'), 30.90338599219857, -96.68039328156031]
[numpy.datetime64('2020-05-31T00:00:00.000000000'), 33.61705810103674, -90.67006300297021]
[numpy.datetime64('2020-06-08T00:00:00.000000000'), 42.397035306761495, -96.60472417710184]
[numpy.datetime64('2020-06-28T00:00:00.000000000'), 35.09085196917682, -78.47751703461472]
barn
[numpy.datetime64('2020-04-01T00:00:00.000000000'), 26.44844

In [27]:
qdf = full_df.copy()
ofn = 'JDTIC'
op = qdf[(qdf['operating_field_name']== ofn)]
labeled = op[op['has_human_annotation']==True]
print(f'{len(labeled)} images from this field have been labeled.')
print(f'There were {len(op)} images collected from the {ofn} field.')
#print(op.experiment_id.unique())
for d in op.date_collected.unique():
    fn = op[op['date_collected']==d]
    fn = fn[fn['crop_name']=='SOYBEANS']
    print(fn.experiment_id.unique())
    print(fn.grower.unique())
    print(fn.crop_name.unique())
    print(d)

#print(op.robot_name.unique())



885 images from this field have been labeled.
There were 2042 images collected from the JDTIC field.
['Thur 5/14/2020' 'thurs 5/14/2020']
['JDTIC']
['SOYBEANS']
2020-05-14T00:00:00.000000000
['Wed 6/17 afternoon']
['Ryan Bergman']
['SOYBEANS']
2020-06-17T00:00:00.000000000


In [4]:
qdf = full_df.copy()

multiple_dates = []

for f in fields:
    of = qdf[qdf['operating_field_name']==f]
    if len(of.date_collected.unique()) >1:
        multiple_dates.append(f)
print(f'there are {len(multiple_dates)} fields with more than one date')
print(multiple_dates)

there are 25 fields with more than one date
['parking lot', '9', '1', 'barn', 'bordo nuevo', 'jkhg', 'cementary', 'lost john north', 'loading docks', 'kjh', 'river field', 'JDTIC', '23', '7', 'west', 'South', '2', 'East', 'North', 'West', '10', 'Home', '60', 'leathers west', 'lkjh']


In [5]:
# Identify the same fields with 2 or more experiment_ids
qdf = full_df.copy()

multiple_growers = []

for f in fields:
    of = qdf[qdf['operating_field_name']==f]
    if len(of.experiment_id.unique()) >1:
        multiple_growers.append(f)
print(f'there are {len(multiple_growers)} fields with more than one experiment_id')
print(multiple_growers)

there are 65 fields with more than one experiment_id
['parking lot', '9', 'goodland 9', '1', '1761', '3200w', 'trailer house', 'lott front', 'arney', 'cr 360', 'barn', 'tree', 'middle furgeson', 'goodland 1', 'bordo nuevo', 'sandpatch', 'big field', 'kemp', 'red barn', 'oil field', 'webber', 'lottfront', '156s', 'barton west1', 'anzeda', 'jkhg', 'cementary', 'lost john north', 'loading docks', 'kjh', 'Kixmiller', 'luknow', 'river field', 'JDTIC', 'bhind pivot northend', '23', 'Bird Dogs 1', '7', '121', 'Boies Bend North', 'west', 'South', '2', 'East', 'South 200', 'North', 'West', '10', 'Dave Creek', 'C15', 'Vanderpol', 'Home', '60', 'mv1', 'kljh', 'North of House', 'snow drip', 'West clarke', 'Agronomy Strip Trials', 'leathers west', 'Federsons', 'South Inlo', 'Rock Garden', 'Kirtley east side', 'Deere demo field 6/1']


In [25]:
qdf = full_df.copy()

multiple_farms = []

for f in fields:
    of = qdf[qdf['operating_field_name']==f]
    if len(of.farm.unique()) >1:
        multiple_farms.append(f)
print(f'there are {len(multiple_farms)} fields with more than one farm')
print(multiple_farms)

there are 20 fields with more than one farm
['parking lot', '9', '1', 'trailer house', 'arney', 'cementary', 'lost john north', 'loading docks', 'river field', 'JDTIC', '7', 'west', 'South', 'East', 'North', 'West', '10', 'Home', 'snow drip', 'leathers west']


In [16]:
qdf = full_df.copy()

multiple_robots = []

for f in fields:
    of = qdf[qdf['operating_field_name']==f]
    if len(of.robot_name.unique()) >1:
        multiple_robots.append(f)
print(f'there are {len(multiple_robots)} fields with more than one robot_name')
print(multiple_robots)

there are 12 fields with more than one robot_name
['9', 'goodland 9', '1', 'trailer house', 'arney', 'jkhg', 'kjh', 'JDTIC', 'East', 'North', 'West', 'Home']


In [11]:
qdf = full_df.copy()

multiple_ids = []

for f in fields:
    of = qdf[qdf['operating_field_name']==f]
    if len(of.experiment_id.unique()) >1:
        multiple_ids.append(f)
print(f'there are {len(multiple_ids)} fields with more than one experiment_id')
print(multiple_ids)

there are 65 fields with more than one experiment_id
['parking lot', '9', 'goodland 9', '1', '1761', '3200w', 'trailer house', 'lott front', 'arney', 'cr 360', 'barn', 'tree', 'middle furgeson', 'goodland 1', 'bordo nuevo', 'sandpatch', 'big field', 'kemp', 'red barn', 'oil field', 'webber', 'lottfront', '156s', 'barton west1', 'anzeda', 'jkhg', 'cementary', 'lost john north', 'loading docks', 'kjh', 'Kixmiller', 'luknow', 'river field', 'JDTIC', 'bhind pivot northend', '23', 'Bird Dogs 1', '7', '121', 'Boies Bend North', 'west', 'South', '2', 'East', 'South 200', 'North', 'West', '10', 'Dave Creek', 'C15', 'Vanderpol', 'Home', '60', 'mv1', 'kljh', 'North of House', 'snow drip', 'West clarke', 'Agronomy Strip Trials', 'leathers west', 'Federsons', 'South Inlo', 'Rock Garden', 'Kirtley east side', 'Deere demo field 6/1']


In [23]:
mr = set(multiple_robots)
mg = set(multiple_growers)
mf = set(multiple_farms)
md =set(multiple_dates)

problem_fields = md.intersection(mg)
print(len(problem_fields))

14


In [29]:
qdf = full_df.copy()

farm = 'Fincher Farms'
ofn = 'lost john north'
grower = 'Jeff Buyck'

op = qdf[(qdf['operating_field_name']== ofn)]
gr = qdf[qdf['grower']==grower]
fr = qdf[qdf['farm']==farm]

#print(f'{len(labeled)} images from this field have been labeled.')
#print(f'There were {len(op)} images collected from the {ofn} field.')
#print(op.experiment_id.unique())
#print(op.camera_height.unique())

print(op.farm.unique())
print(op.collected_on.dt.date.unique())
print(len(op))

['holthouse' 'senter island']
[datetime.date(2020, 5, 6) datetime.date(2020, 5, 21)]
894
