In [1]:
import ee
ee.Initialize()

In [2]:
import ee
import geemap
import os
import functools
from zipfile import ZipFile
from dbfread import DBF
from dbfread import FieldParser
import shutil
import pandas as pd
from csv import DictReader
#from zdrive import Downloader
import time

#to dos:
#please add more comments what a function or a group of functions does
#generalize as a script with variables at the top
#ensure a whole year of data gets used for 2015 - use 2015-06-01 until 2016-07-01

In [3]:
# Function to get a square around point of interest
# Rural : 5.5km Radius
# Urban : 2 km Radius
def bounding_box(loc, urban_rural, urban_radius, rural_radius):
    if urban_rural is 'U'or  urban_rural is 'u':
        size = urban_radius
    else:
        size = rural_radius

    intermediate_buffer = loc.buffer(size) #buffer radius, half your box width in m
    intermediate_box = intermediate_buffer.bounds() #Draw a bounding box around the circle
    return(intermediate_box)

In [4]:
#Masking of clouds
def maskClouds(img, MAX_CLOUD_PROBABILITY):

    clouds = ee.Image(img.get('cloud_mask')).select('probability')
    isNotCloud = clouds.lt(MAX_CLOUD_PROBABILITY)
    return img.updateMask(isNotCloud)

In [5]:
#Masking of edges
def maskEdges(s2_img):
    return s2_img.updateMask(s2_img.select('B8A').mask().updateMask(s2_img.select('B9').mask()))

In [6]:
def get_image(cluster, survey_dir, urban_radius, rural_radius, MAX_CLOUD_PROBABILITY):
    
    #Get images collections
    s2Sr = ee.ImageCollection('COPERNICUS/S2')
    s2Clouds = ee.ImageCollection('COPERNICUS/S2_CLOUD_PROBABILITY')

    #Get time span
    year_uncut = str(cluster["year"])
    year = year_uncut[:year_uncut.rfind('.')]
    if int(year)<2016:
        START_DATE = ee.Date('2015-06-01')
        END_DATE = ee.Date('2016-07-01')
    else:
        START_DATE = ee.Date(year+'-01-01')
        END_DATE = ee.Date(year+'-12-31')
    
   
    #Point of interest (longitude, latidude)
    lat_float = float(cluster["latidude"])
    lon_float = float(cluster["longitude"])                 
    loc = ee.Geometry.Point([lon_float, lat_float])
    #Region of interest
    region = bounding_box(loc, cluster['urban_rural'], urban_radius, rural_radius)

    # Filter input collections by desired data range and region.
    #criteria = ee.Filter.And(ee.Filter.bounds(region), ee.Filter.date(START_DATE, END_DATE))
    #s2Sr = s2Sr.filter(criteria).map(maskEdges)
    #s2Clouds = s2Clouds.filter(criteria)
    s2Sr = s2Sr.filterBounds(region).filterDate(START_DATE, END_DATE).map(maskEdges)
    s2Clouds = s2Clouds.filterBounds(region).filterDate(START_DATE, END_DATE)

    # Join S2 with cloud probability dataset to add cloud mask.
    s2SrWithCloudMask = ee.Join.saveFirst('cloud_mask').apply(
      primary =  s2Sr, 
      secondary = s2Clouds, 
      condition = ee.Filter.equals(
          leftField =  'system:index', rightField = 'system:index') 
        )

    maskCloudsWithProb = functools.partial(maskClouds, MAX_CLOUD_PROBABILITY = MAX_CLOUD_PROBABILITY)
    s2CloudMasked = ee.ImageCollection(s2SrWithCloudMask).map(maskCloudsWithProb).median()
    s2CloudMasked = s2CloudMasked.select(['B1','B2','B3', 'B4', 'B5', 'B6', 'B7', 'B8','B8A', 'B9', 'B10'\
                                         ,'B11','B12']).clip(region)
    #Saving location/directory
    #out_dir = os.path.join(survey_dir, cluster["ID-cluster"]+'.tif')
    #geemap.ee_export_image(s2CloudMasked, filename=out_dir, scale=10)
    filename = cluster["ID-cluster"]
    task = ee.batch.Export.image.toDrive(s2CloudMasked, description = filename, folder = 'sentinel', scale = 10)
    task.start()
    print('Created', filename)
    return loc

In [7]:
# Extract solely the dbf-file from the zip and save them into a seperate folder
def get_dbf(listOfFileNames, newpath, zipObject):

    for fileName in listOfFileNames:
        if fileName.endswith('.dbf') or fileName.endswith('.DBF'):
            # Extract a single file from zip
            zipObject.extract(fileName, newpath)
            
#If within a zip folder other zip folders included take/work with the the zip folder which is bigger            
def extract_bigger_zip(zip_dir,filenames,newpath):
    zips_dir = zip_dir[:zip_dir.find('.')]
    if not os.path.exists(zips_dir):
        os.makedirs(zips_dir)
    with ZipFile(zip_dir, 'r') as zipObj:
    #Extract all the contents of zip file in different directory
        zipObj.extractall(zips_dir)

    big_size = 0
    big_zip = None
    for item in filenames:
        if item.endswith('.zip') or item.endswith('.ZIP'):
            file_dir = os.path.join(zips_dir, item)
            curr_size= os.stat(file_dir).st_size
            if curr_size >= big_size:
                big_size = curr_size
                big_zip = file_dir
    check_dbf(big_zip,newpath)    
    
#Second mainpart for single zip file        
def check_dbf(zip_dir, newpath):
    big_zip = None
    with ZipFile(zip_dir, 'r') as zipObject:
        listOfFileNames = zipObject.namelist()

        if any((element.endswith('.dbf') or element.endswith('.DBF')) for element in listOfFileNames):
            get_dbf(listOfFileNames, newpath, zipObject)
        elif any((element.endswith('.zip') or element.endswith('.ZIP')) for element in listOfFileNames):
            extract_bigger_zip(zip_dir, listOfFileNames, newpath)  

#Delete all directories which may be created during the process to figure out which zip folder is bigger             
def delete_zip_folders(dir_name,newpath):
    list_subfolders_with_paths = [f.path for f in os.scandir(dir_name) if f.is_dir()]
    for folder in list_subfolders_with_paths:
        if not folder == newpath:
            shutil.rmtree(folder)
            
#Main part-> runs through all zip files in directory  
def extract_dbf(dir_name, newpath):
    #Create folder for DBF files
    if not os.path.exists(newpath):
        os.makedirs(newpath)
    folder = os.listdir(dir_name)
    #Extract dbf file (if existing) from each zip file
    for item in folder:
        if item.endswith('.zip') or item.endswith('.ZIP'):
            zip_dir = os.path.join(dir_name, item)
            check_dbf(zip_dir, newpath)
            
    delete_zip_folders(dir_name,newpath)
    

In [8]:
#Needed as a file may contain \x00\ which the standard libary is not able to process, hence an additional class based on the following issue https://github.com/olemb/dbfread/issues/20 is used 

class MyFieldParser(FieldParser):
    def parseN(self, field, data):
        data = data.strip().strip(b'*\x00')  # Had to strip out the other characters first before \x00, as per super function specs.
        return super(MyFieldParser, self).parseN(field, data)

    def parseD(self, field, data):
        data = data.strip(b'\x00')
        return super(MyFieldParser, self).parseD(field, data)
    
    
#Create single CSV file    
def get_csv(dbf_path, dir_csv):

    clusters = list()
    filename = os.path.basename(dbf_path[:dbf_path.rfind('.')])
    if not filename.endswith('SR'):
        table = DBF(dbf_path, parserclass=MyFieldParser)
        for record in table:
        
            cluster = {"ID-survey": filename, "ID-cluster" : record['DHSID'], "cluster": record['DHSCLUST'],\
                        "year": record["DHSYEAR"],"urban_rural": record['URBAN_RURA'], "latidude": record["LATNUM"],\
                      "longitude": record['LONGNUM']}
            clusters.append(cluster)
    
        clust_df = pd.DataFrame(clusters)
    
        export_name = filename+'.csv'
        export_dir = os.path.join(dir_csv, export_name)

        clust_df.to_csv(export_dir)
    else:
        print(filename)

In [9]:
#Create CSV files for each survey (with dbf file); Extract information from dbf file   
def create_csv(dir_dbf, dir_csv):
    if not os.path.exists(dir_csv):
        os.makedirs(dir_csv)

    directory = os.listdir(dir_dbf)
    for file in directory: 
        #print("This is the file", file)
        dbf_path = os.path.join (dir_dbf, file)
        get_csv(dbf_path, dir_csv)
    return 

In [10]:
#Move all csv-files from surveys which took place before 2013 to a seperate folder
def before_2013 (export_path, before_2013):
    
    if not os.path.exists(before_2013):
        os.makedirs(before_2013)

    directory = os.listdir(export_path)    
    for file in directory:
        #print(file)
        if file.endswith('.csv'):
            csv_file = os.path.join(export_path, file)
            survey_year = pd.read_csv(csv_file, usecols = ['year'])

            if survey_year['year'].max()< 2013:
                new_path = os.path.join(before_2013, file)
                os.rename(csv_file, new_path)

In [11]:

def get_survey_images(file_dir, survey_dir, urban_radius, rural_radius, MAX_CLOUD_PROBABILITY):
    with open(file_dir, 'r') as read_obj:
    # pass the file object to DictReader() to get the DictReader object
        dict_reader = DictReader(read_obj)
    # get a list of dictionaries from dct_reader
        clusters = list(dict_reader)
        
    for cluster in clusters:
        loc = get_image(cluster, survey_dir, urban_radius, rural_radius, MAX_CLOUD_PROBABILITY)
        time.sleep(45)
'''   
def download_local(survey_dir):
    output_directory = survey_dir
    d = Downloader()

    # folder which want to download from Drive
    folder_id = '1ST67vgoNlfuClI-zPlEp4F38JsnaM441'
    d.downloadFolder(folder_id, destinationFolder=output_directory)
'''    
#Main functions for getting the sentinel images; here: only the directory for each survey is created     
def sentinel_img_survey(img_dir, csv_dir, sentinel_done, urban_radius, rural_radius, MAX_CLOUD_PROBABILITY):
    if not os.path.exists(img_dir):
        os.makedirs(img_dir)
        
    csv_directory = os.listdir(csv_dir)
    img_directory = os.listdir(img_dir)
    
    for file in csv_directory:
        if file.endswith('.csv'):
            filename  = file[:file.rfind('.')]
            #Check if survey is already done we skip this survey (sentinel_done file has to be edited manually)
            with open(sentinel_done) as f:
                if not filename in f.read():
                    survey_dir = os.path.join(img_dir, file[:file.rfind('.')])
                    if not os.path.exists(survey_dir):
                        os.makedirs(survey_dir)
                    file_dir = os.path.join(csv_dir, file)
                    get_survey_images(file_dir, survey_dir, urban_radius, rural_radius, MAX_CLOUD_PROBABILITY)
                    #download_local(survey_dir)
                    print(file, 'finished')

In [None]:
#Main Part

#Parameter
urban_radius = 1000
rural_radius = 5000
MAX_CLOUD_PROBABILITY = 20


#Paths
zip_dir = "/home/shannon/Dokumente/Dokumente/studium/ASA/Projekt/SatelliteImage__GEE/correlation/GPS_Data"
dbf_dir = os.path.join(zip_dir, "dbf_files")
csv_dir = os.path.join(zip_dir, "gps_csv")
before_2013_dir = os.path.join(csv_dir, "before_2013")
#if directly to local computer
#img_dir = os.path.join(csv_dir, "tif_data")
img_dir = '/run/media/shannon/TOSHIBA/Sentinel'
sentinel_done = os.path.join(zip_dir, "sentinel_done.txt")


#Functions
extract_dbf(zip_dir, dbf_dir)
print('Extracted dbf files')
create_csv(dbf_dir, csv_dir)
print('created csv data')
before_2013(csv_dir, before_2013_dir)
print('Moved all surveys from before 2013 into seperate folder')
sentinel_img_survey(img_dir, csv_dir, sentinel_done, urban_radius, rural_radius, MAX_CLOUD_PROBABILITY)


Extracted dbf files
CDGE71FLSR
KEGE6AFLSR
MWGE6IFLSR
NMGE6AFLSR
SNGE6IFLSR
SNGE71FLSR
SNGE7AFLSR
SNGE7IFLSR
SNGE7RFLSR
TZGE71FLSR
created csv data
Moved all surveys from before 2013 into seperate folder
Created ZW201500000001
Created ZW201500000002
Created ZW201500000003
Created ZW201500000004
Created ZW201500000005
Created ZW201500000006
Created ZW201500000007
Created ZW201500000008
Created ZW201500000009
Created ZW201500000010
Created ZW201500000011
Created ZW201500000012
Created ZW201500000013
Created ZW201500000014
Created ZW201500000015
Created ZW201500000016
Created ZW201500000017
Created ZW201500000018
Created ZW201500000019
Created ZW201500000020
Created ZW201500000021
Created ZW201500000022
Created ZW201500000023
Created ZW201500000024
Created ZW201500000025
Created ZW201500000026
Created ZW201500000027
Created ZW201500000028
Created ZW201500000029
Created ZW201500000030
Created ZW201500000031
Created ZW201500000032
Created ZW201500000033
Created ZW201500000034
Created ZW20150

Created ZW201500000349
Created ZW201500000350
Created ZW201500000351
Created ZW201500000352
Created ZW201500000353
Created ZW201500000354
Created ZW201500000355
Created ZW201500000356
Created ZW201500000357
Created ZW201500000358
Created ZW201500000359
Created ZW201500000360
Created ZW201500000361
Created ZW201500000362
Created ZW201500000363
Created ZW201500000364
Created ZW201500000365
Created ZW201500000366
Created ZW201500000367
Created ZW201500000368
Created ZW201500000369
Created ZW201500000370
Created ZW201500000371
Created ZW201500000372
Created ZW201500000373
Created ZW201500000374
Created ZW201500000375
Created ZW201500000376
Created ZW201500000377
Created ZW201500000378
Created ZW201500000379
Created ZW201500000380
Created ZW201500000381
Created ZW201500000382
Created ZW201500000383
Created ZW201500000384
Created ZW201500000385
Created ZW201500000386
Created ZW201500000387
Created ZW201500000388
Created ZW201500000389
Created ZW201500000390
Created ZW201500000391
Created ZW2

Created EG201401371006
Created EG201401371012
Created EG201401381307
Created EG201401381313
Created EG201401390203
Created EG201401390207
Created EG201401400210
Created EG201401400403
Created EG201401410702
Created EG201401410908
Created EG201401420103
Created EG201401420107
Created EG201401430402
Created EG201401430406
Created EG201401440802
Created EG201401440807
Created EG201401450203
Created EG201401450205
Created EG201401460501
Created EG201401460503
Created EG201401470110
Created EG201401470119
Created EG201401480103
Created EG201401480405
Created EG201401490301
Created EG201401490304
Created EG201401500601
Created EG201401500606
Created EG201401510903
Created EG201401510905
Created EG201401521201
Created EG201401521205
Created EG201401531502
Created EG201401531504
Created EG201401541806
Created EG201401541811
Created EG201401552102
Created EG201401552105
Created EG201401562404
Created EG201401562408
Created EG201401572704
Created EG201401572709
Created EG201401590202
Created EG2

Created EG201403107701
Created EG201403118401
Created EG201403119108
Created EG201403119804
Created EG201403120701
Created EG201403121502
Created EG201403122303
Created EG201403130203
Created EG201403131105
Created EG201403142005
Created EG201403142902
Created EG201403153806
Created EG201403154709
Created EG201403165501
Created EG201403166406
Created EG201403177302
Created EG201403178202
Created EG201403180101
Created EG201403180103
Created EG201403190202
Created EG201403190205
Created EG201403200101
Created EG201403200103
Created EG201403210202
Created EG201403210205
Created EG201403220104
Created EG201403220404
Created EG201403230101
Created EG201403230104
Created EG201403240205
Created EG201403240505
Created EG201403250103
Created EG201403250104
Created EG201403260102
Created EG201403260106
Created EG201403270104
Created EG201403270108
Created EG201403280301
Created EG201403281002
Created EG201403290507
Created EG201403291104
Created EG201403300104
Created EG201403300108
Created EG2

Created EG201404850502
Created EG201404860902
Created EG201404861201
Created EG201404870304
Created EG201404870602
Created EG201404880105
Created EG201404880403
Created EG201404890109
Created EG201404890404
Created EG201404900307
Created EG201404900703
Created EG201404910102
Created EG201404910104
Created EG201404920202
Created EG201404920502
Created EG201404930107
Created EG201404930405
Created EG201404940101
Created EG201404940401
Created EG201404950201
Created EG201404950402
Created EG201404960201
Created EG201404960203
Created EG201404970301
Created EG201404970303
Created EG201404980302
Created EG201404980704
Created EG201404991101
Created EG201404991402
Created EG201405000201
Created EG201405000202
Created EG201405010204
Created EG201405010605
Created EG201405020903
Created EG201405021305
Created EG201405030201
Created EG201405030502
Created EG201405040201
Created EG201405040202
Created EG201405050102
Created EG201405050104
Created EG201405060204
Created EG201405060404
Created EG2

Created EG201406550105
Created EG201406560202
Created EG201406560501
Created EG201406570201
Created EG201406570204
Created EG201406580301
Created EG201406580303
Created EG201406590101
Created EG201406590103
Created EG201406600101
Created EG201406600105
Created EG201406610204
Created EG201406611003
Created EG201406620204
Created EG201406621004
Created EG201406630101
Created EG201406630904
Created EG201406640402
Created EG201406641303
Created EG201406650404
Created EG201406651205
Created EG201406660102
Created EG201406660402
Created EG201406670708
Created EG201406671403
Created EG201406680309
Created EG201406680904
Created EG201406690304
Created EG201406691005
Created EG201406700208
Created EG201406700401
Created EG201406710304
Created EG201406710602
Created EG201406720204
Created EG201406720801
Created EG201406730103
Created EG201406730304
Created EG201406740202
Created EG201406740602
Created EG201406750101
Created EG201406750401
Created EG201406760202
Created EG201406760203
Created EG2