# Data Changes Evaluation

In [1]:
import openeo
import time
import logging
logging.basicConfig(level=logging.INFO)

In [2]:
# Define constants

# west, south, east, north
datasets = [
{"west": 10.288696, "south": 45.935871, "east": 12.189331, "north": 46.905246, "crs": "EPSG:4326", "begin": "2017-05-01", "end": "2017-05-31"}, # running example
{"west": 26.330109, "south": -16.023376, "east": 28.171692, "north": -15.253714, "crs": "EPSG:4326", "begin": "2006-03-30", "end": "2006-03-30"}, # http:// dx.doi.org/ 10.3390/ rs8050402 1
{"west": 26.830673, "south": -15.307366, "east": 27.052460, "north": -15.113227, "crs": "EPSG:4326", "begin": "2007-03-30", "end": "2007-03-30"}, # http:// dx.doi.org/ 10.3390/ rs8050402 2
{"west": 25.563812, "south": -14.429360, "east": 26.092529, "north": -13.980713, "crs": "EPSG:4326", "begin": "2006-03-29", "end": "2006-03-31"}, # http:// dx.doi.org/ 10.3390/ rs8050402 3
{"west": -2.449951, "south": 51.771239, "east": -2.239838, "north": 51.890901, "crs": "EPSG:4326", "begin": "2007-07-23", "end": "2007-07-23"}, # http:// dx.doi.org/ 10.1016/ j.jag.2014.12.001 1
{"west": -2.449951, "south": 51.771239, "east": -2.239838, "north": 51.890901, "crs": "EPSG:4326", "begin": "2005-08-22", "end": "2005-08-22"}, # http:// dx.doi.org/ 10.1016/ j.jag.2014.12.001 2
{"west": -2.449951, "south": 51.771239, "east": -2.239838, "north": 51.890901, "crs": "EPSG:4326", "begin": "2007-07-23", "end": "2007-07-24"}, # http:// dx.doi.org/ 10.1016/j.jag.2016.12.003 1
{"west": 16.506958, "south": 47.529257, "east": 17.188110, "north": 48.022998, "crs": "EPSG:4326", "begin": "2007-07-23", "end": "2007-07-24"}, # Big Data Infrastructures for Processing Sentinel Data, Wolfgang Wagner
{"west": 104.276733, "south": 8.423470, "east": 106.809082, "north": 11.156845, "crs": "EPSG:4326", "begin": "2007-01-01", "end": "2011-01-01"}, # THE USE OF SAR BACKSCATTER TIME SERIES FOR CHARACTERISING RICE PHENOLOGY, DUY NGUYEN
]

# Connection
LOCAL_EODC_DRIVER_URL = "http://openeo.local.127.0.0.1.nip.io"


In [3]:
# Connect with GEE backend
con = openeo.connect(LOCAL_EODC_DRIVER_URL)
# Reset Mockupstate and database
con.resetdb()
con.update_file(None, False)
con

<openeo.rest.rest_connection.RESTConnection at 0x7fc7df304dd8>

In [4]:
# Choose dataset
processes = con.get_processes()
pgA = processes.get_collection(name="s2a_prd_msil1c")
pgA = processes.filter_daterange(pgA, extent=["2017-05-01", "2017-05-31"])
pgA = processes.filter_bbox(pgA, west=10.288696, south=45.935871, east=12.189331, north=46.905246, crs="EPSG:4326")

# Choose processes
pgA = processes.ndvi(pgA, nir="B08", red="B04")
pgA = processes.min_time(pgA)
pgA.graph

{'imagery': {'imagery': {'extent': {'crs': 'EPSG:4326',
    'east': 12.189331,
    'north': 46.905246,
    'south': 45.935871,
    'west': 10.288696},
   'imagery': {'extent': ['2017-05-01', '2017-05-31'],
    'imagery': {'name': 's2a_prd_msil1c', 'process_id': 'get_collection'},
    'process_id': 'filter_daterange'},
   'process_id': 'filter_bbox'},
  'nir': 'B08',
  'process_id': 'NDVI',
  'red': 'B04'},
 'process_id': 'min_time'}

In [5]:
# Create job A out of the process graph A (pgA)
jobA = con.create_job(pgA.graph)

jobA.job_id

'jb-2476c3f0-b27b-4653-b255-806b91214d28'

In [6]:
# Start job at the backend
jobA.start_job()

202

In [7]:
# Wait until the job execution was finished
desc = jobA.describe_job
while desc["status"] == "submitted":
    desc = jobA.describe_job
pidA = jobA.get_data_pid()
pidA

'qu-672574fc-cc86-424f-af8d-fe18445e2ed5'

In [10]:
# re-execute query and get the resulting file list from the back end
file_listA = con.get_filelist(pidA)
file_listA["input_files"]["state"]

'EQUAL'

In [9]:
''' 2. Update one of the resulting files of the PID-A query  '''

con.update_file()


True

In [11]:
''' 3. Get file list of PID-A  '''
# re-execute query and get the resulting file list from the back end
file2_listA = con.get_filelist(pidA)
file2_listA["input_files"]["state"]

'EQUAL'

In [12]:
''' 4. Run duplicate of Job A named Job B  '''
# Reuse the defined process Graph from Job A to create Job B.
jobB = con.create_job(pgA.graph)
jobB.start_job()

202

In [13]:
# Wait until the job execution was finished
desc = jobB.describe_job
while desc["status"] == "submitted":
    desc = jobB.describe_job
pidB = jobB.get_data_pid()
pidB

'qu-ac56c5c6-a9e9-414c-be4c-8041b9037730'

In [21]:
# re-execute query and get the resulting file list from the back end
pidB = jobB.get_data_pid()
file_listB = con.get_filelist(pidB)
(file_listA != file_listB)

True

In [16]:
''' 5. Run duplicate of Job A, by using the data PID of job A named Job C  '''
# Take input data of job A by using the input data pid A of job A
pgC = processes.get_collection(data_pid=pidA)

# Choose processes
pgC = processes.ndvi(pgC, nir="B08", red="B04")
pgC = processes.min_time(pgC)

In [17]:
# Create Job C
jobC = con.create_job(pgC.graph)
time.sleep(10)

In [18]:
# Start Job C
jobC.start_job()

202

In [19]:
# Wait until the job execution was finished
desc = jobC.describe_job
while desc["status"] == "submitted":
    desc = jobC.describe_job
pidC = jobC.get_data_pid()
pidC

'qu-672574fc-cc86-424f-af8d-fe18445e2ed5'

In [23]:
# re-execute query and get the resulting file list from the back end
file_listC = con.get_filelist(pidC)
(file_listA == file_listC)

True

In [24]:
''' 6. Delete the old file of the updated one '''
con.update_file(deleted=True)


True

In [26]:
''' 7. Get Filelist of PID-A '''
# re-execute query and get the resulting file list from the back end
file_listA = con.get_filelist(pidA)
file_listA["input_files"]["state"]

"[{'date': '2017-05-04', 'name': 'S2A_MSIL1C_20170504T101031_N0205_R022_T32TPR_20170504T101349_new', 'timestamp': '2017-05-08', 'path': '/eodc/products/copernicus.eu/s2a_prd_msil1c/2017/05/04/S2A_MSIL1C_20170504T101031_N0205_R022_T32TPR_20170504T101349.zip_new'}]"

In [27]:
''' 8. Run duplicate of Job A, by using the data PID of job A named Job D  '''
# Take input data of job A by using the input data pid A of job A
pgD = processes.get_collection(data_pid=pidA)

# Choose processes
pgD = processes.ndvi(pgC, nir="B08", red="B04")
pgD = processes.min_time(pgD)


In [28]:
# Create Job D
jobD = con.create_job(pgD.graph)
time.sleep(5)

In [29]:
# Start Job D
jobD.start_job()

202

In [30]:
# Wait until the job execution was finished
desc = jobD.describe_job
while desc["status"] == "submitted":
    desc = jobD.describe_job
pidD = jobD.get_data_pid()
pidD

'qu-7227434a-a9de-4e25-837f-c7bb64863aa1'

In [31]:
((pidD != pidB) and (pidD != pidA))

True