# Read meta data of Sentinel-2
## crawl through a list of Sentinel-2 SAFE data folders and output center geolocations

In [35]:
import xml.etree.ElementTree as ET
from io import StringIO

import numpy as np

In [2]:
# list of input sentinel2 meta xml file
inxmlfiles = [ \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150818T101204_R022_V20150813T102406_20150813T102406.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150818T101204_R022_V20150813T102406_20150813T102406.xml', \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150818T101216_R065_V20150806T102902_20150806T102902.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150818T101216_R065_V20150806T102902_20150806T102902.xml', \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150818T101237_R022_V20150813T102406_20150813T102406.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150818T101237_R022_V20150813T102406_20150813T102406.xml', \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150818T101308_R022_V20150813T102406_20150813T102406.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150818T101308_R022_V20150813T102406_20150813T102406.xml', \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150818T101319_R065_V20150806T102902_20150806T102902.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150818T101319_R065_V20150806T102902_20150806T102902.xml', \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150818T101440_R022_V20150813T102406_20150813T102406.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150818T101440_R022_V20150813T102406_20150813T102406.xml', \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150818T101451_R080_V20150817T114755_20150817T114755.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150818T101451_R080_V20150817T114755_20150817T114755.xml', \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150818T101452_R022_V20150813T102406_20150813T102406.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150818T101452_R022_V20150813T102406_20150813T102406.xml', \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150818T101504_R080_V20150817T114433_20150817T114433.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150818T101504_R080_V20150817T114433_20150817T114433.xml', \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150818T101516_R022_V20150813T102406_20150813T102406.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150818T101516_R022_V20150813T102406_20150813T102406.xml', \
'/neponset/nbdata07/aerb/Sentinel/OriginalDownloads/S2A_OPER_PRD_MSIL1C_PDMC_20150820T085706_R051_V20150815T110427_20150815T110427.SAFE/S2A_OPER_MTD_SAFL1C_PDMC_20150820T085706_R051_V20150815T110427_20150815T110427.xml']

In [137]:
def arg_dup(arr1d, tol=1e-5):
    # a function to return the indexes of duplicate floating numbers in a 1d array
    tmp = arr1d.copy()
    ind = np.argsort(tmp)
    mask = np.append(np.diff(tmp[ind])>1e-5, True)
    for i, m in enumerate(mask[-1:0:-1]):
        if not mask[-2-i]:
            mask[-1-i] = mask[-2-i]
    return ind[np.logical_not(mask)]

In [147]:
def img_center(inxml):
    tree = ET.parse(inxml)
    root = tree.getroot()
    ext_pos = root.findall('.//EXT_POS_LIST')
    if len(ext_pos) > 1:
        print "More than one EXT_POS_LIST found. Ambiguous information. Stop and Check!\n"
    elif len(ext_pos) < 1:
        print "No EXT_POS_LIST found. No information. Stop and check!\n"
    else:
        ext_pos_str = ext_pos[0].text
        ext_pos_str_ls = ext_pos_str.split()
        ext_pos_data = np.loadtxt(StringIO(unicode(ext_pos_str)))
        ext_pos_data = np.reshape(ext_pos_data, (len(ext_pos_data)/2, 2))
        # remove the duplicate positions
        mask = np.ones_like(ext_pos_data, dtype=np.bool_)
        ind = arg_dup(ext_pos_data[:,0])
        mask[ind, 0] = False
        ind = arg_dup(ext_pos_data[:,1])
        mask[ind, 1] = False
        row_mask = np.logical_or(mask[:, 0], mask[:, 1])
        row_mask[np.logical_not(row_mask).nonzero()[0][0]] = True
        ext_pos_data_unique = ext_pos_data[row_mask, :]
    return np.mean(ext_pos_data_unique, axis=0)

In [148]:
centers = [img_center(f) for f in inxmlfiles]

In [150]:
np.array(centers)

array([[ 45.54057171,  10.98341491],
       [ 55.42783468,  12.70589136],
       [ 46.89428776,  10.74465658],
       [ 47.30777276,  12.37200586],
       [ 45.7542157 ,   9.06261623],
       [ 45.51117222,  12.26293361],
       [ 20.55020712, -16.50684425],
       [ 45.3184355 ,  11.28062347],
       [ 39.11659376,  -8.94365982],
       [ 48.11253227,  13.72141217],
       [ 35.51143263,  -2.94628949]])

In [114]:
tmp = ext_pos_data[:, 0].copy()
# tmp.sort()
# np.diff(tmp)>1e-5

In [115]:
ind = np.argsort(tmp)

In [116]:
print tmp
print tmp[ind]
mask = np.append(np.diff(tmp[ind])>1e-5, True)
print mask
# np.diff(tmp[ind])<1e-5

[ 46.0468141   45.05874577  45.03474742  46.02197953  46.0468141
  46.0468141 ]
[ 45.03474742  45.05874577  46.02197953  46.0468141   46.0468141
  46.0468141 ]
[ True  True  True False False  True]


In [117]:
for i, m in enumerate(mask[-1:0:-1]):
    print i, m
    if not mask[-2-i]:
        mask[-1-i] = mask[-2-i]

0 True
1 False
2 False
3 True
4 True
