

## Step 1 Streetview image GPS 

Filter and rename Padang streeview imges GPS file.


## Step 2 Create individule streetview detection json

Filtered detection results by their optimal confident score for building properties, but we are using 0.5 as the threshod for building parts classes. 

Create the json file to save individule streetview image, so we can ingest them to PostgreSQL databases. 


In [None]:
b_parts = "results-padang-building-parts.csv"
b_properties = "results-padang-building-properties.csv"
sv_gps= "post_processing/padang_coordinate_filtered.csv"
b_footprint= "padang_footprints/padang_footprints_fixed.geojson"
model_stats = 'model_evaluation_hp_padang_raw.csv'

In [None]:
"""
Transform detected objects from image to map coordinates

@author: developmentseed
"""
import os 
from os import makedirs, path as op
import csv
import ast
import pandas as pd

import numpy as np
from pyproj import Geod
from shapely.geometry import shape, LineString, GeometryCollection
import json


## Step 1 Streetview image GPS 

Filter and rename Padang streeview imges GPS file.

In [None]:
def read_big_csv(csv, columns=None):
    """filter and rename tractory csv files
    
    Args:
        csv: csv file that contains gps coordination 
        
    Returns:
        df: exported dataframe filtered columns and renamed
    """
    # csv is more than 500MB, so read data in chunks e.g. 5000 rows per chunk
    c_size = 5000
    if columns:
        select_cols = ['HEADING', 'IMAGE_ID', 'LAT', 'LONG']
        df_chunks_lst = [chuck_df for chuck_df in pd.read_csv(csv, 
                                    chunksize=c_size, skipinitialspace=True, usecols=select_cols)]
    else:
        df_chunks_lst = [chuck_df for chuck_df in pd.read_csv(csv, 
                                    chunksize=c_size, skipinitialspace=True)]
    
    df = pd.concat(df_chunks_lst)
    return df



## Step 2 Create individule streetview detection json

Filtered detection results by their optimal confident score for building properties, but we are using 0.5 as the threshod for building parts classes. 

Create the json file to save individule streetview image, so we can ingest them to PostgreSQL databases. 



In [None]:
df_model = pd.read_csv(model_stats)
df_model = df_model.sort_values(by='threshold score', ascending=False)
df_model

In [None]:
def _filter_values(values, optimal_score):
    """filter detection by the optimal score
    
    Args:
        detection: the detection for each images
    Returns:
        new
    """
    values = ast.literal_eval(values)
    new_dict = dict(detection_scores=[],
       detection_classes =[],
       detection_boxes = [], 
                   image_fname=None)
    for detection in values:
        if float(detection['detection_scores'])>= float(optimal_score[int(detection['detection_classes'])]):
            print(int(detection['detection_classes']), detection['detection_scores'], detection['detection_boxes'])
            new_dict['detection_scores'].append(detection['detection_scores'])
            new_dict['detection_classes'].append(int(detection['detection_classes']))
            new_dict['detection_boxes'].append(detection['detection_boxes'])
    return new_dict

In [None]:
def filter_detection_by_optimal_score(optimal_score, df, output_dir, col = ['tile', 'output']):
    """filter dataframe values by the given threshold
    
    Args:
        optimal_score: dictionary include the optimal key and value;
        df: the target dataframe
    
    Returns:
        (None): write each row into json file
    """
    df2dict = dict(zip(df[col[0]], df[col[1]]))
    for key, value in df2dict.items():
        new_dict = _filter_values(value, optimal_score)
        new_dict['image_fname']=key
        if not op.isdir(output_dir):
            makedirs(output_dir)
        nm = op.splitext(op.basename(key))[0]
        out_file = op.join(output_dir, f"{nm}.json")
        if op.isfile(out_file):
            continue
        else:
            with open(out_file, 'w') as f:
                json.dump(new_dict, f)


In [None]:
filter_dict = dict(zip(df_model['cls_id'], df_model['threshold score']))
filter_dict

In [None]:
df_b_properties = read_big_csv(b_properties)

In [None]:
filter_detection_by_optimal_score( filter_dict, df_b_properties, "building_properties_sv_inferences_resluts")

In [None]:
df_b_parts = read_big_csv(b_parts)

In [None]:
filter_dict_parts = {1:0.5, 2:0.5, 3:0.5, 4:0.5}

In [None]:
filter_detection_by_optimal_score(filter_dict_parts, df_b_parts, "building_parts_sv_inferences_resluts")

## Adding `cam` info to streetview geolocation 

In [None]:
sv_gps = read_big_csv(sv_gps)
sv_gps.head()

In [None]:
# filter only sv image that taken by Cam1 and Cam3. 
sv_gps = sv_gps.loc[(sv_gps['IMAGE_ID'].apply(lambda x: x.split('_')[6] in cams.keys()))]
sv_gps.head()

In [None]:
## adding cam to the dataframe
sv_gps['cam'] = sv_gps['IMAGE_ID'].apply(lambda x: cams[x.split('_')[6]])
sv_gps.head()

In [None]:
sv_gps.to_csv("padang_coordinate_filtered.csv", index=False)