## Setup

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pandas as pd
import boto3
import os
import re
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style
from typing import List, Union
from pathlib import Path
pd.set_option('display.max_columns', 500)

In [3]:
path_data = Path.home() / 'data/site'
path_images = path_data / 'images'

## Get metadata

In [5]:
df = pd.read_json(path_data / 'raw' / 'metadata.json')

### From S3

In [346]:
boto3.setup_default_session(profile_name="crayon-site")
s3 = boto3.resource("s3")
s3_client = boto3.client("s3")

## EDA

### Basic Statistics

Look at some basic statistics for the metadata of the images.

In [6]:
df.head(1).T

Unnamed: 0,20_IL_1902_DBM6597911224/Mapping/DBI2168114984_DBI-136-YY9x6QBf-DJI_0918.JPG
ExifTool:ExifToolVersion,11.88
ExifTool:Warning,[minor] Possibly incorrect maker notes offsets...
File:FileName,DBI2168114984_DBI-136-YY9x6QBf-DJI_0918.JPG
File:Directory,20_IL_1902_DBM6597911224/Mapping
File:FileSize,8562206
...,...
Composite:DateTimeCreated,
Composite:DOF,
File:Directory1,20_IL_1902_DBM6597911224
File:Directory2,Mapping


Look at the mean and std of the altitude columns based on the specific site.

In [24]:
df_alt = df.loc[:,list(df.columns[df.columns.str.contains('Altitude|Directory1')].values)]

In [27]:
df_alt.groupby(['File:Directory1']).agg(['mean', 'std'])

Unnamed: 0_level_0,EXIF:GPSAltitudeRef,EXIF:GPSAltitudeRef,EXIF:GPSAltitude,EXIF:GPSAltitude,XMP:AbsoluteAltitude,XMP:AbsoluteAltitude,XMP:RelativeAltitude,XMP:RelativeAltitude,Composite:GPSAltitude,Composite:GPSAltitude,Photoshop:GlobalAltitude,Photoshop:GlobalAltitude
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
File:Directory1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
,0.0,0.0,154.273562,2.284722,,,,,154.273562,2.284722,,
1086,1.0,0.0,26.772112,1.465633,,,35.010748,0.057543,-26.772112,1.465633,,
20_AZ_1066_DBM3174640504,0.0,0.0,375.138075,0.693437,375.626402,0.608034,30.435146,0.162759,375.138075,0.693437,,
20_AZ_1069_DBM3916895777,0.0,0.0,351.044797,0.064208,351.047797,0.064208,35.007797,0.064208,351.044797,0.064208,,
20_CA_1327_DBM2236713281,1.0,0.0,18.612926,0.433817,,,35.01464,0.103819,-18.612926,0.433817,,
20_IL_1875_DBM4642991424,0.0,0.0,198.787899,0.720024,,,,,198.787899,0.720024,,
20_IL_1902_DBM6597911224,0.0,0.0,142.563228,0.074684,142.561228,0.074684,35.011228,0.074684,142.563228,0.074684,,
20_IN_1037_DBM5218738239,0.0,0.0,260.940252,0.682972,261.433239,0.62889,34.964465,0.205204,260.940252,0.682972,,
20_IN_1488_DBM0343529591,0.0,0.0,87.016144,17.104732,87.016736,17.104425,35.005952,0.05819,87.016144,17.104732,,
20_KY_1317_DBM8839393143,0.0,0.0,347.817611,2.051157,347.816808,2.051446,30.432838,0.12962,347.817611,2.051157,,
