In [None]:
from random import shuffle
import pandas as pd
import os
import shutil

import brtdevkit.util.s3 as brt_s3
s3_client = brt_s3.S3()

from bson import ObjectId                                                          
from brtdevkit.core.db import DBConnector                                          
db = DBConnector()

def get_specific_fields(df, artifact = 'nrg'):
    """
    Takes a dataframe with specific [grower, farm, operating_field_name] combinations and retreives document records from 
    Aletheia matching just those fields. Useful when you  need a collection of fields but don't want to query the entire record.
    :param: df: a pandas dataframe with [grower, farm, operating_field_name] columns.
    :param: artifact: The type of artifact you want to return. Default is 'nrg'
     """
    db = DBConnector()
    filters = {'grower': df['grower'][0], 'farm': df['farm'][0],'operating_field_name': df['operating_field_name'][0]}
    img_filters = {'project_name': 'shasta', **filters, 'artifacts.kind':artifact}
    master_df = db.get_documents_df('image', img_filters, limit=None)
    for n in range(len(df[1:])):
        filters = {'grower': str(df['grower'][n]), 'farm': str(df['farm'][n]),'operating_field_name': str(df['operating_field_name'][n])}
        img_filters = {'project_name': 'shasta', **filters, 'artifacts.kind':artifact}
        field_df = db.get_documents_df('image', img_filters, limit=None)
        if len(field_df)==0:
            print(f'Could not find field: { df.operating_field_name[n]} from grower: {df.grower[n]}')
        master_df = pd.concat([master_df, field_df], axis=0)
    print(f'Query returned {len(master_df)} {artifact} records.')
    return master_df

def find_inference_images(fields_df, analysis_name, n_inference = 50):
    """
    Given a dataframe with grower, farm and operating_field_name, find the collection_ids associated wihth those fields and sample 
    inference images. Download them to separate directories and generate a microscope csv for viewing in microdscope. 
    :param: fields: A list of grower_farm_fields to find inference images for.  
    :param: n_images: The number of images to examine per field. Default is 50
    """
    df = get_specific_fields(fields_df)
    base_dir = '/home/williamroberts/code/brtdevkit/Projects/Inference Images/'
    df['grower_farm_field'] = df['grower'] +'_' + df['farm'] + '_' + df['operating_field_name']
    non_null_df= df[df.collection_id.isnull()==False]
    
    print('The following fields do not have available predictions:')
    for g in df[~df['grower_farm_field'].isin(non_null_df.grower_farm_field.unique())].grower_farm_field.unique():
        print(g)
    for gff in non_null_df.grower_farm_field.unique():
        
        # Create directories for analysis, images
        if os.path.exists(base_dir + analysis_name)==False:
            os.mkdir(base_dir + analysis_name)
        
        analysis_dir = base_dir + analysis_name + '/' + str(gff)
        nrg_dir = analysis_dir + '/nrg_images'
        pred_dir = analysis_dir + '/predictions'
        
        if os.path.exists(analysis_dir)==False:
            os.mkdir(analysis_dir)
        if os.path.exists(nrg_dir)==False:
            os.mkdir(nrg_dir)
        else:
            shutil.rmtree(nrg_dir)
            os.mkdir(nrg_dir)
        if os.path.exists(pred_dir)==False:
            os.mkdir(pred_dir)
        else:
            shutil.rmtree(pred_dir)
            os.mkdir(pred_dir)
            
        nrg_ids = []
        nrg_s3 = []
        predictions_s3 = []
        
        for l in non_null_df[non_null_df['grower_farm_field']==gff].collection_id.unique():
            #print(l)
            try:
                image_evaluations = ImageEvaluation.list(tag=l).auto_paging_iter()
                for ie in image_evaluations:
                    #print(ie)
                    if ie.image_data.artifacts[1]['kind'] == 'nrg':
                        nrg_ids.append(ie.image_data.artifacts[1]['_id'])
                        nrg_s3.append(ie.image_data.artifacts[1]['s3_key'])
                        predictions_s3.append(ie['prediction_s3_path'])
            except:
                continue
        
        ilist =list(range(len(nrg_ids)))
        shuffle(ilist)
        ilist = ilist[0:n_inference]
        os.chdir(nrg_dir)
        for m in ilist:
            s3_client.download_file('mesa-data',nrg_s3[m], nrg_ids[m]+'.png')
        os.chdir(pred_dir)
        for m in ilist:
            #print(predictions_s3[m].split('mesa-data/')[1])
            s3_client.download_file('mesa-data',predictions_s3[m].split('mesa-data/')[1], nrg_ids[m]+'_pred.png')

    #resp = db.db.image_evaluation.find({'tag':df.collection_id.unique()[0]})
    
#list(db.db.image_evaluation.find({'image': ObjectId('60d06feb68a9e2c5baeeb11d')}))
find_inference_images(field_eval, analysis_name = 'Early_July_Corn', n_inference=50)

Query returned 25953 nrg records.
The following fields do not have available predictions:


[31;1m2021-07-08 10:37:25,947 - APIRequestor - ERROR - API error received | error_code : 400, error_message : ImageOld matching query does not exist.
[0m[31;1m2021-07-08 10:37:27,170 - APIRequestor - ERROR - API error received | error_code : 400, error_message : ImageOld matching query does not exist.
[0m[31;1m2021-07-08 10:37:28,399 - APIRequestor - ERROR - API error received | error_code : 400, error_message : ImageOld matching query does not exist.
[0m[31;1m2021-07-08 10:37:29,353 - APIRequestor - ERROR - API error received | error_code : 400, error_message : ImageOld matching query does not exist.
[0m[31;1m2021-07-08 10:37:31,778 - APIRequestor - ERROR - API error received | error_code : 400, error_message : ImageOld matching query does not exist.
[0m[31;1m2021-07-08 10:37:32,766 - APIRequestor - ERROR - API error received | error_code : 400, error_message : ImageOld matching query does not exist.
[0m[31;1m2021-07-08 10:37:34,688 - APIRequestor - ERROR - API error rece

In [73]:
import pandas as pd

field_eval = pd.read_csv('fields_to_evaluate.csv')
test_df = field_eval[5:7].reset_index()
test_df.grower

0    Brian Brekken
1    brian brekken
Name: grower, dtype: object

In [83]:
ilist =list(range(20))
ilist

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [79]:
from brtdevkit.data import ImageEvaluation
#
#90a90bd6-51db-4fb1-ba00-586596ddf393
image_evaluations = ImageEvaluation.list(tag='90a90bd6-51db-4fb1-ba00-586596ddf393').auto_paging_iter()
for ie in image_evaluations:
    print(ie)

{
  "created_at": "2021-06-22T09:40:17.719000",
  "created_by_email": "taylor.ritenour@bluerivertech.com",
  "created_by_id": "5df82b39f6a13cf3aabf1fc1",
  "id": "60d1b0011f267db8b3dd098b",
  "image_data": {
    "angle_to_row": "PARALLEL",
    "annotations": [],
    "artifacts": [
      {
        "_id": "60d0ea1dc10c92457874d058",
        "content_hash": "0eed52536b269766c1b863661643f90d",
        "height": 1080,
        "kind": "raw",
        "metadata": {},
        "processing_type": "ISP",
        "s3_bucket": "mesa-data",
        "s3_key": "prod/images/343a53ca-8716-4ec7-9aa8-e744515fff07-mnt_ebs_vol_output_273_raw.tif",
        "width": 1920
      },
      {
        "_id": "60d0ea1dc10c92457874d059",
        "content_hash": "a7607fcafb332209f1019ad832c42bf8",
        "height": 1080,
        "kind": "nrg",
        "metadata": {},
        "processed_at": "2021-06-21T19:35:56.825000",
        "processing_type": "ISP",
        "revision": "03120303",
        "s3_bucket": "mesa-data",


In [3]:
# This will download the prediction 

import brtdevkit.util.s3 as brt_s3
s3_client = brt_s3.S3()
s3_client.download_file('mesa-data','prod/image_evaluations/60d1abe9bc8d3895671d358d/s3_bucket_artifacts_60d06feb68a9e2c5baeeb11d.png', 'pred.png')

#download_from_url('s3://mesa-data/prod/image_evaluations/60d1abe9bc8d3895671d358d/s3_bucket_artifacts_60d06feb68a9e2c5baeeb11d.png', 'pred.png')



True

In [67]:
# for a given field, find the unique tags
# sample images from each tag
# create a directory to store the files
# download images and predictions into separate directories
# generate microscope csv

bucket = 'mesa_data'

df = get_specific_fields(test_df)


for gff in df.grower_farm_field.unique():
    os.mkdir()
    os.chdir()
    cids = df[df['grower_farm_field']==gff].collection_id.unique()
    keys = []
    paths = []
    for c in cids:
        resp = db.db.image_evaluation.find({'tag':c})
        for doc in resp:
            key = doc['prediction_s3_path'].split('data/')[1]
            file  = 'pred_' + doc['prediction_s3_path'].split('facts_')[1]
            #s3_client.download_file(bucket, key, file)
            keys.append(key)
            paths.append(file)
    for i in range(50):
        s3_client.download_file(bucket, keys[i], paths[i])
    os.chdir()

Query returned 2984 nrg records.


AttributeError: 'DataFrame' object has no attribute 'grower_farm_field'

In [14]:
resp = db.db.image_evaluation.find({'tag':'90a90bd6-51db-4fb1-ba00-586596ddf393'})
for doc in resp:
    bucket = 'mesa_data'
    key = doc['prediction_s3_path'].split('data/')[1]
    file  = 'pred_' + doc['prediction_s3_path'].split('facts_')[1]#.split('.png')[0]
    #s3_client.download_file(bucket, key, file)
    print(key)
    print(file)

prod/image_evaluations/60d057be1f267db8b3d73505/s3_bucket_artifacts_60cfc0e430ac80edc662d59b.png
pred_60cfc0e430ac80edc662d59b.png
prod/image_evaluations/60d057bf98704bcc08e414cb/s3_bucket_artifacts_60cfc0c4cc244568d2076791.png
pred_60cfc0c4cc244568d2076791.png
prod/image_evaluations/60d057c098704bcc08e414e0/s3_bucket_artifacts_60cfc1df69d5a4fd788ca0d6.png
pred_60cfc1df69d5a4fd788ca0d6.png
prod/image_evaluations/60d057c198704bcc08e414ee/s3_bucket_artifacts_60cfc05f1f267db8b3d4a5bf.png
pred_60cfc05f1f267db8b3d4a5bf.png
prod/image_evaluations/60d057c227d932f621fe29ff/s3_bucket_artifacts_60cfc120ccd107ff61953a82.png
pred_60cfc120ccd107ff61953a82.png
prod/image_evaluations/60d057c3c074f328599eccc4/s3_bucket_artifacts_60cfc14998704bcc08e27058.png
pred_60cfc14998704bcc08e27058.png
prod/image_evaluations/60d057c4181a634bf081fa36/s3_bucket_artifacts_60cfc08c27d932f621fba478.png
pred_60cfc08c27d932f621fba478.png
prod/image_evaluations/60d057c59e6842e7cc58d01b/s3_bucket_artifacts_60cfc0d669d5a4f

In [4]:
# This will give me every evaluation artifact in the collection_id

from brtdevkit.data import ImageEvaluation
image_evaluations = ImageEvaluation(tag='90a90bd6-51db-4fb1-ba00-586596ddf393').auto_paging_iter()
for ie in image_evaluations:
    print (ie)


TypeError: __init__() missing 1 required positional argument: 'values'