In [1]:
import boto3
import json
import os
import pandas as pd
from darwin.client import Client

pd.set_option('display.max_rows', None)

BUCKET_NAME = 'zipline-photogrammetry-service-kms'
IMAGE_PREFIX = 'photogrammetry_service/images/'
V7_DATASET_SLUG = 'tower-batch-7'
V7_API_KEY = "Dx868pT.49-DS7U_9riIuBda2QLC44dqzYopSdSL"

s3 = boto3.client('s3')
client = Client.from_api_key(V7_API_KEY)
dataset = client.get_remote_dataset(dataset_identifier=f'zipline/{V7_DATASET_SLUG}')

from darwin.importer import importer, get_importer
parser = get_importer("darwin")

In [2]:
THIS_FILE_DIR = os.path.abspath('.')
WORK_DIR = os.path.normpath(os.path.join(THIS_FILE_DIR, '../data'))
WORK_DIR

'/home/ubuntu/mine/tower_injestion/survey_upload_to_v7/data'

In [3]:
result_files = [file for file in os.listdir(os.path.join(WORK_DIR, 'output_files')) if file.endswith('_results.csv')]
result_dfs = [pd.read_csv(os.path.join(WORK_DIR, 'output_files', file), index_col=0) for file in result_files]
result_df = pd.concat(result_dfs)
tower_result_df = result_df.query('tower_detected == 1')
tower_result_df.shape

(299, 4)

In [4]:
def extract_image_keys_to_process(project):
    image_json_file_key = IMAGE_PREFIX + f"{project}/images.json"
    obj = s3.get_object(Bucket = BUCKET_NAME, Key = image_json_file_key)
    images = json.loads(obj["Body"].read())
    return images

def check_project_is_fully_processed(project):
    processed_keys = set(result_df.query('project == @project')['key'].unique())
    all_keys = set(extract_image_keys_to_process(project))
    return all_keys == processed_keys

def get_project_summary(project):
    project_df = result_df.query('project == @project')
    output_dict = {
        'project': project,
        'num_images': project_df['key'].nunique(),
        'num_images_with_towers': project_df['tower_detected'].sum(),
        'project_is_fully_processed': check_project_is_fully_processed(project)
    }
    return output_dict

def upload_project(project):
    this_project_df = pd.read_csv(os.path.join(WORK_DIR, 'output_files', f'{project}_results.csv'), index_col=0).query('tower_detected == 1')
    paths = list(this_project_df['image_path'].unique())
    print(f'Going to push {len(this_project_df)} images from {project}')
    try:
        dataset.push(files_to_upload = paths, path = f"/{project}/")
        print(f'Successfully pushed {project}')
    except Exception as e:
        print(f'pushing {project} failed. Error: {e}')

def upload_annotations(project, results_df_with_towers = tower_result_df, dataset = dataset):
    assert list(results_df_with_towers['tower_detected'].unique()) == [True]
    assert project in results_df_with_towers['project'].unique()
    this_project_df = results_df_with_towers.query('project == @project')
    annotation_paths = []
    for image_path in this_project_df['image_path'].unique():
        image_name = image_path.split('/')[-1]
        annotation_file_name = image_name.rsplit('.', 1)[0]+'.json'
        annotation_paths.append(os.path.join(WORK_DIR, 'output_files', 'annotations', annotation_file_name))
    importer.import_annotations(dataset, parser, annotation_paths, append=True)

In [5]:
projects_summary = [get_project_summary(project) for project in result_df['project'].unique()]
projects_summary_df = pd.DataFrame(projects_summary)
projects_summary_df

Unnamed: 0,project,num_images,num_images_with_towers,project_is_fully_processed
0,Project_ci1_flight_34931,358,7,True
1,Project_CI1_18229_34449,430,3,True
2,Project_gh1_flight_199221,369,0,True
3,Project_gh1_flight_199222,313,1,True
4,Project_gh1_flight_205089,391,0,True
5,Project_gh1_flight_202747,366,0,True
6,Project_gh1_flight_202832,378,0,True
7,Project_gh1_flight_202794,366,3,True
8,Project_gh1_flight_202331,364,5,True
9,Project_gh1_flight_204253,619,6,True


In [6]:
projects_summary_df.query('num_images_with_towers > 0')

Unnamed: 0,project,num_images,num_images_with_towers,project_is_fully_processed
0,Project_ci1_flight_34931,358,7,True
1,Project_CI1_18229_34449,430,3,True
3,Project_gh1_flight_199222,313,1,True
7,Project_gh1_flight_202794,366,3,True
8,Project_gh1_flight_202331,364,5,True
9,Project_gh1_flight_204253,619,6,True
10,Project_ci1_flight_40852,433,50,True
11,Project_ci1_flight_34970,440,2,True
12,Project_ci1_flight_34458,361,3,True
13,Project_gh1_flight_204499,220,1,True


In [7]:
# upload_project('Project_CI1_18229_34449')

In [8]:
# upload_annotations('Project_ci1_flight_40861')

In [9]:
# def correct_annotation(file):
#     with open(file) as f:
#         data = json.load(f)
    
#     for bbox in data['annotations']:
#         bbox['bounding_box']['x'] = bbox['bounding_box']['x'] - (bbox['bounding_box']['w']/2)
#         bbox['bounding_box']['y'] = bbox['bounding_box']['y'] - (bbox['bounding_box']['h']/2)
    
#     json_str = json.dumps(data, indent= 4)
#     with open(file, 'w') as f:
#         f.write(json_str)