In [167]:
import rasterio as rio
from rasterio.mask import mask
import pandas as pd
import geopandas as gpd
import cartopy.crs as ccrs
from os import path, remove
import pyproj
import numpy as np
from shapely.geometry import Point, mapping

from glob import glob
import re
from subprocess import call 

# Image Data Extraction Pipeline
The purpose here is to get all of the images in a directory, extract an area of interest from them, get pixel values, and add a row to a database for each pixel in the area of interest within each image. 

In [61]:
IMAGEDIR = "../images/"
locfile = "locations_map.csv"
file_search_format = "*_*.zip"
filename_regex = "(\d*)_(.*).zip"

In [67]:
files = list(glob(path.join(IMAGEDIR, file_search_format)))
locations = pd.read_csv(path.join(IMAGEDIR, locfile)).set_index("id")

In [253]:
def process_file(fullpath):
    filename = path.basename(fullpath)
    match = re.search(filename_regex, filename)
    loc_id = match[1]
    img_id = match[2]
    print("Unzipping..."+fullpath)
    call(['unzip', fullpath])

    loc = locations.loc[int(loc_id)]
    print(glob("*AnalyticMS_clip.tif")[0])
    raster = rio.open(glob("*AnalyticMS_clip.tif")[0])
    rast_proj = pyproj.Proj(init=raster.crs['init'])
    buf_proj = pyproj.Proj(init='epsg:4326')
    
    lon, lat = pyproj.transform(buf_proj, rast_proj, loc.longitude, loc.latitude)
    buf = mapping(Point(lon, lat).buffer(5))
    
    print(lon, lat)

    out_image, out_transform = mask(raster, [buf], crop=True)

    no_data = raster.nodata
    data = out_image.data[0]
    
    reflectance = np.extract(data != no_data, data)
    row, col = np.where(data != no_data) 
    T1 = out_transform * rio.Affine.translation(0.5, 0.5) # reference the pixel centre
    rc2xy = lambda r, c: (c, r) * T1 
    
    d = gpd.GeoDataFrame({'col':col,'row':row, 'loc':loc_id, 'img': img_id})
    # coordinate transformation
    try:
        d['x_geotiff'] = d.apply(lambda row: rc2xy(row.row,row.col)[0], axis=1)
        d['y_geotiff'] = d.apply(lambda row: rc2xy(row.row,row.col)[1], axis=1)
        d['geometry'] =d.apply(lambda row: Point(row['x_geotiff'], row['y_geotiff']), axis=1)
        for band in range(4):
            data = out_image.data[band]    
            reflectance = np.extract(data != no_data, data)
            d['band'+str(band+1)] = pd.Series(reflectance, index=d.index)
    except Exception as e:
        for f in glob(img_id+"*"):
            print("Deleting " + f)
            remove(f)
        print("Error reading, aborted.")
        raise Exception("Error reading")

        
    d.crs = raster.crs
    d = d.to_crs({'init':'epsg:4326'})
    
        
    
    for f in glob(img_id+"*"):
        print("Deleting " + f)
        remove(f)
    
    print('done')
    return(d.drop(['row', 'col'], axis=1))
    


In [261]:
dfs = []
for file in files:
    print(file)
    try:
        ans = process_file(file)
    except Exception:
        print("Error, skipping")
        continue
    dfs.append(ans)
    
dfs = pd.concat(dfs)

../images/1402_20170521_181232_101b.zip
Unzipping...../images/1402_20170521_181232_101b.zip
20170521_181232_101b_3B_AnalyticMS_clip.tif
610738.3910092548 5186947.353997492
Deleting 20170521_181232_101b_3B_AnalyticMS_clip.tif
Deleting 20170521_181232_101b_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170521_181232_101b_3B_AnalyticMS_metadata_clip.xml
Error reading, aborted.
Error, skipping
../images/1473_20170510_181145_1007.zip
Unzipping...../images/1473_20170510_181145_1007.zip
20170510_181145_1007_3B_AnalyticMS_clip.tif
610556.9200630395 5186610.526534616
Deleting 20170510_181145_1007_3B_AnalyticMS_metadata_clip.xml
Deleting 20170510_181145_1007_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170510_181145_1007_3B_AnalyticMS_clip.tif
done
../images/1270_20170522_181357_0f35.zip
Unzipping...../images/1270_20170522_181357_0f35.zip
20170522_181357_0f35_3B_AnalyticMS_clip.tif
595970.8738076831 5180171.462718747
Deleting 20170522_181357_0f35_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170522_181357_0f35

20170610_181351_1002_3B_AnalyticMS_clip.tif
595970.8738076831 5180171.462718747
Deleting 20170610_181351_1002_3B_AnalyticMS_clip.tif
Deleting 20170610_181351_1002_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170610_181351_1002_3B_AnalyticMS_metadata_clip.xml
done
../images/1224_20170409_181121_1009.zip
Unzipping...../images/1224_20170409_181121_1009.zip
20170409_181121_1009_3B_AnalyticMS_clip.tif
611370.7368328264 5197312.529572357
Deleting 20170409_181121_1009_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170409_181121_1009_3B_AnalyticMS_metadata_clip.xml
Deleting 20170409_181121_1009_3B_AnalyticMS_clip.tif
done
../images/1398_20170502_181207_0f52.zip
Unzipping...../images/1398_20170502_181207_0f52.zip
20170502_181207_0f52_3B_AnalyticMS_clip.tif
610736.1440104076 5186945.089287995
Deleting 20170502_181207_0f52_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170502_181207_0f52_3B_AnalyticMS_metadata_clip.xml
Deleting 20170502_181207_0f52_3B_AnalyticMS_clip.tif
Error reading, aborted.
Error, skipping
.

20170403_181109_1012_3B_AnalyticMS_clip.tif
610738.3910092548 5186947.353997492
Deleting 20170403_181109_1012_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170403_181109_1012_3B_AnalyticMS_metadata_clip.xml
Deleting 20170403_181109_1012_3B_AnalyticMS_clip.tif
done
../images/1228_20170403_181108_1012.zip
Unzipping...../images/1228_20170403_181108_1012.zip
20170403_181108_1012_3B_AnalyticMS_clip.tif
611371.6432848042 5197304.765202335
Deleting 20170403_181108_1012_3B_AnalyticMS_clip.tif
Deleting 20170403_181108_1012_3B_AnalyticMS_metadata_clip.xml
Deleting 20170403_181108_1012_3B_AnalyticMS_DN_udm_clip.tif
done
../images/1228_20161230_193630_0d06.zip
Unzipping...../images/1228_20161230_193630_0d06.zip
20161230_193630_0d06_3B_AnalyticMS_clip.tif
611371.6432848042 5197304.765202335
Deleting 20161230_193630_0d06_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20161230_193630_0d06_3B_AnalyticMS_clip.tif
Deleting 20161230_193630_0d06_3B_AnalyticMS_metadata_clip.xml
done
../images/1310_20170507_181332_0f43.

Deleting 20170512_181153_0f51_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170512_181153_0f51_3B_AnalyticMS_clip.tif
Deleting 20170512_181153_0f51_3B_AnalyticMS_metadata_clip.xml
done
../images/1266_20170524_181406_1039.zip
Unzipping...../images/1266_20170524_181406_1039.zip
20170524_181406_1039_3B_AnalyticMS_clip.tif
595966.2748854075 5180172.500688318
Deleting 20170524_181406_1039_3B_AnalyticMS_clip.tif
Deleting 20170524_181406_1039_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170524_181406_1039_3B_AnalyticMS_metadata_clip.xml
done
../images/1477_20170415_011314_1_0c82.zip
Unzipping...../images/1477_20170415_011314_1_0c82.zip
20170415_011314_1_0c82_3B_AnalyticMS_clip.tif
610556.8175402258 5186616.0825707605
Deleting 20170415_011314_1_0c82_3B_AnalyticMS_clip.tif
Deleting 20170415_011314_1_0c82_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170415_011314_1_0c82_3B_AnalyticMS_metadata_clip.xml
Error reading, aborted.
Error, skipping
../images/1590_20170507_181332_0f43.zip
Unzipping...../images/1590_2

20170305_181650_0e19_3B_AnalyticMS_clip.tif
611371.6432848042 5197304.765202335
Deleting 20170305_181650_0e19_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170305_181650_0e19_3B_AnalyticMS_metadata_clip.xml
Deleting 20170305_181650_0e19_3B_AnalyticMS_clip.tif
done
../images/1590_20170331_181122_1024.zip
Unzipping...../images/1590_20170331_181122_1024.zip
20170331_181122_1024_3B_AnalyticMS_clip.tif
589778.5542664389 5178090.615017051
Deleting 20170331_181122_1024_3B_AnalyticMS_metadata_clip.xml
Deleting 20170331_181122_1024_3B_AnalyticMS_clip.tif
Deleting 20170331_181122_1024_3B_AnalyticMS_DN_udm_clip.tif
done
../images/1356_20161207_181509_0e0e.zip
Unzipping...../images/1356_20161207_181509_0e0e.zip
20161207_181509_0e0e_3B_AnalyticMS_clip.tif
594883.4537370933 5180164.181023777
Deleting 20161207_181509_0e0e_3B_AnalyticMS_metadata_clip.xml
Deleting 20161207_181509_0e0e_3B_AnalyticMS_clip.tif
Deleting 20161207_181509_0e0e_3B_AnalyticMS_DN_udm_clip.tif
done
../images/1473_20170413_215945_0c45.

20170525_181354_1017_3B_AnalyticMS_clip.tif
594883.4537370933 5180164.181023777
Deleting 20170525_181354_1017_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170525_181354_1017_3B_AnalyticMS_metadata_clip.xml
Deleting 20170525_181354_1017_3B_AnalyticMS_clip.tif
done
../images/1307_20170507_181332_0f43.zip
Unzipping...../images/1307_20170507_181332_0f43.zip
20170507_181332_0f43_3B_AnalyticMS_clip.tif
591330.4121310068 5177771.679141472
Deleting 20170507_181332_0f43_3B_AnalyticMS_metadata_clip.xml
Deleting 20170507_181332_0f43_3B_AnalyticMS_DN_udm_clip.tif
Deleting 20170507_181332_0f43_3B_AnalyticMS_clip.tif
done
../images/1469_20170409_181817_0e0d.zip
Unzipping...../images/1469_20170409_181817_0e0d.zip
20170409_181817_0e0d_3B_AnalyticMS_clip.tif
610557.8058265822 5186603.873365733
Deleting 20170409_181817_0e0d_3B_AnalyticMS_metadata_clip.xml
Deleting 20170409_181817_0e0d_3B_AnalyticMS_clip.tif
Deleting 20170409_181817_0e0d_3B_AnalyticMS_DN_udm_clip.tif
done
../images/1224_20170115_181453_0e19.

In [266]:
dfs.to_csv("PROVISIONAL_datatable.csv")