## Setup

In [350]:
import pandas as pd
import boto3
import os
from pathlib import Path
pd.set_option('display.max_columns', 500)

In [351]:
from site_tools import get_image_files
from site_tools.metadata_tools import extract_metadata

In [352]:
path_data = Path.home() / 'data/site'
path_images = path_data / 'images'

## Get metadata

In [353]:
fns = get_image_files(path_images)

This will remove unloadable images from the filesystem. Optionally, don't remove the file but just modify the `fns` list using the last line below. Commented out due to filesystem changes. The verify_images runs in parallel, but still not fast having to open every image.


In [296]:
# bad = verify_images(fns)
# for bad_file in bad:
#     os.remove(bad_file)
# fns = [fn for fn in fns if not fn in set(bad)]

In [354]:
df = extract_metadata(path_images)

## Cleanup metadata

Started to cleanup some of the metadata, but much more needs to be done.

In [355]:
def replace_plus_in_degree(df, cols):
    if type(cols) == str:
        cols = [cols]
    for col in cols:
        if df[col].dtype != np.number:
            df[col] = df[col].str.replace('+', '')

In [356]:
def convert_col_to_float(df, cols):
    if type(cols) == str:
        cols = [cols]
    for col in cols:        
        df[col] = df[col].astype(float)

In [357]:
def convert_col_to_str(df, cols):
    if type(cols) == str:
        cols = [cols]
    for col in cols:
        df[col] = df[col].astype(str)

In [358]:
deg_cols = ['XMP:AbsoluteAltitude', 'XMP:RelativeAltitude',
            'XMP:GimbalRollDegree', 'XMP:GimbalYawDegree',
            'XMP:GimbalPitchDegree', 'XMP:FlightRollDegree',
            'XMP:FlightYawDegree', 'XMP:FlightPitchDegree', 'XMP:GPSLatitude',
            'XMP:FlightXSpeed', 'XMP:FlightYSpeed', 'XMP:FlightZSpeed']

In [359]:
float_cols = ['EXIF:ShutterSpeedValue', 'Composite:CircleOfConfusion']
null_cols = ['EXIF:SubSecTimeOriginal', 'EXIF:SubSecTimeDigitized']
str_cols = ['EXIF:Software']

In [360]:
replace_plus_in_degree(df, deg_cols)
convert_col_to_float(df, float_cols + deg_cols + null_cols)
convert_col_to_str(df, str_cols)

## Save metadata

### Locally

In [361]:
df.to_json(path_data / 'raw' / 'metadata.json')

### To S3

In [362]:
boto3.setup_default_session(profile_name="crayon-site")
s3 = boto3.resource("s3")
s3_client = boto3.client("s3")

In [363]:
local_path = path_data / 'raw' / 'metadata.json'
bucket = 'st-crayon'
s3_path = 'data/raw/' + 'metadata.json'
s3_client.upload_file(str(local_path), bucket, str(s3_path))