<a href="https://colab.research.google.com/github/Transega/Continous-Pipeline-Documentation/blob/main/data_preparation_xarray.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook is meant to assist in the preparation of dataset for ml pipeline. We look into sentinel 2 time series data

# Installation of Packages

In [1]:
%%capture
if 'google.colab' in str(get_ipython()):
    !pip install --upgrade xee
    !pip install rioxarray

In [58]:
# Installs geemap package
import subprocess

try:
    import geemap
    import geopandas as gpd
    import numpy as np
    from rasterstats import zonal_stats
    from rasterstats.io import array_to_affine
except ImportError:
    print('geemap package not installed. Installing ...')
    subprocess.check_call(["python", '-m', 'pip', 'install', 'geemap'])
    subprocess.check_call(["python", '-m', 'pip', 'install', 'geopandas'])
    subprocess.check_call(["python", '-m', 'pip', 'install', 'rasterstats'])

# Checks whether this notebook is running on Google Colab
try:
    import google.colab
except:
    import geemap

import ee
import xarray
import rioxarray as rxr
import logging
%matplotlib inline
from ee.ee_exception import EEException
from rasterstats import zonal_stats
from rasterio.features import geometry_mask
import geopandas as gpd
import datetime as dt
import dask.dataframe as dd
import os

geemap package not installed. Installing ...


In [17]:
log = logging.getLogger(__name__)

# Authenticate Gee

In [18]:
cloud_project = "trofmis"
service_account = "applied-research-near-realtime@farm-analytics.iam.gserviceaccount.com" # replace with your service account
gee_key = "/content/farmppk.json" # replace with the path to your key

try:
    ee.Initialize(project=cloud_project, opt_url='https://earthengine-highvolume.googleapis.com')
except:
    # ee.Authenticate()
    # ee.Initialize(project=cloud_project, opt_url='https://earthengine-highvolume.googleapis.com')
    credentials = ee.ServiceAccountCredentials(service_account, gee_key)
    ee.Initialize(credentials)

# Define some variables

In [20]:
land_cover_esri_image_collection = ee.ImageCollection("projects/sat-io/open-datasets/landcover/ESRI_Global-LULC_10m_TS")
                                      # .filter(ee.Filter.bounds(geometry))

In [13]:
geojson_data = {
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "coordinates": [
          [
            [
              35.13446698651714,
              0.6058996258973224
            ],
            [
              35.13446698651714,
              0.33053794164814576
            ],
            [
              35.51809749848567,
              0.33053794164814576
            ],
            [
              35.51809749848567,
              0.6058996258973224
            ],
            [
              35.13446698651714,
              0.6058996258973224
            ]
          ]
        ],
        "type": "Polygon"
      }
    }
  ]
}
smaller_geojson_data = {
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "coordinates": [
          [
            [
              35.24060275899234,
              0.541991429065888
            ],
            [
              35.24060275899234,
              0.473240381254044
            ],
            [
              35.34201454057987,
              0.473240381254044
            ],
            [
              35.34201454057987,
              0.541991429065888
            ],
            [
              35.24060275899234,
              0.541991429065888
            ]
          ]
        ],
        "type": "Polygon"
      }
    }
  ]
}

In [None]:
Kenya_aoi = {
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "coordinates": [
          [
            [
              33.85112722608034,
              4.205430873294361
            ],
            [
              34.73020013094751,
              1.5992946225229474
            ],
            [
              33.812599936126134,
              -1.1734318505414052
            ],
            [
              37.39384372249762,
              -3.118824603200679
            ],
            [
              39.153626657061835,
              -5.185427557134872
            ],
            [
              41.85937545232591,
              -1.5645610539139625
            ],
            [
              41.46434900749759,
              2.6686386606816086
            ],
            [
              42.22032065762431,
              4.4381018810724555
            ],
            [
              34.80813935757439,
              5.854914918549625
            ],
            [
              33.85112722608034,
              4.205430873294361
            ]
          ]
        ],
        "type": "Polygon"
      }
    }
  ]
}

# Functions

Convert geojson to ee object

In [7]:



def get_ee_geometry(geometry):
    """This function returns Google Earth engine feature collection"""
    ee_geometry = None
    for geom in geometry["features"]:
        try:
            ee_geom = None
            geom = geom["geometry"]
            # print(geom, 'geom')
            if geom["type"] == "Polygon":
                ee_geom = ee.Geometry.Polygon(geom["coordinates"])
            elif geom["type"] == "MultiPolygon":
                ee_geom = ee.Geometry.MultiPolygon(geom["coordinates"])
            elif geom["type"] == "Point":
                ee_geom = ee.Geometry.Point(geom["coordinates"])
            else:
                raise ValueError("Only Points and Polygons are supported.")
            ee_geometry = ee_geometry.union(ee_geom) if ee_geometry else ee_geom

        except EEException:
            log.exception("An error occurred while trying to generate an ee object.")
    return ee_geometry


cloud masking

In [11]:
def maskS2clouds(image):
  qa = image.select('QA60')
  cloudBitMask = 1 << 10
  cirrusBitMask = 1 << 11
  mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(
             qa.bitwiseAnd(cirrusBitMask).eq(0))
  return image.updateMask(mask).multiply(0.0001) \
      .select('B.*') \
      .copyProperties(image, ['system:time_start'])

Compute indices functions and rename band

In [8]:
def renamebandsS2(image):
    """this function used to rename band names for sentinel 2 images"""
    reneamed = image.select(['B3', 'B2', 'B4', 'B8', 'B11', 'B12', 'B5', 'B1',
                             'B6', 'B7', 'B8A', 'B9', 'B10',  ],
     ['Green', 'Blue', 'RED', 'NIR', 'SWIR1', 'SWIR2', 'Red_Edge',
      'Aerosols', 'Red_Edge_2', 'Red_Edge_3', 'Red_Edge_4', 'Water_vapor', 'Cirrus'])
    return reneamed

def NDMI(image):
    """Compute normalized difference moisture index"""
    nmdi = image.expression('(nir-swir)/(nir+swir)', {'nir': image.select(['NIR']), 'swir': image.select(['SWIR1'])}).rename('NDMI')
    return image.addBands(nmdi)


def NDWI(image):
    ndwi = image.expression('(green-nir)/(green+nir)', {'nir': image.select(['NIR']), 'green': image.select(['Green'])}).rename('NDWI')
    return image.addBands(ndwi)
def add_ci(image):
    """
    returns chlorophyl index image
    """
    ci = image.expression(
        '(Red_Edge - red)/(Red_Edge + red)', {'Red_Edge': image.select(['Red_Edge']), 'red': image.select(['RED'])}
    ).rename('CI')

    return image.addBands(ci)
def NDVI(image):
    """This function returns NDVI given image"""
    ndvi = image.normalizedDifference(['NIR', 'RED']).rename('NDVI')
    # colle = image.addBands(ndvi)
    # ndvi_onely = colle.select('ndviS2')
    return image.addBands(ndvi)

def parse_date(date_str):
    """Parse a string date to a datetime object."""
    return dt.datetime.strptime(date_str, '%Y-%m-%d')

Nested function for adding lulc band to image collection

In [9]:
def add_lulc(lulcImage):
  "embed lulc as a band in the images in an image collection "

  def add_band(image):
    # lulcImage = lulcImage.rename('lulc')
    return image.addBands(lulcImage.rename('lulc')).copyProperties(image)

  return add_band

Sentinel 2 image collection preparation and adding lulc band

In [23]:
geometry = get_ee_geometry(smaller_geojson_data)
def getImageCollection(start_date='2017-01-01', end_date='2024-12-31',geometry=geometry):
  start_date_dt, end_date_dt = parse_date(start_date), parse_date(end_date)
  start_date_2017, end_date_2017 = start_date, '2017-12-31'
  start_date_2018, end_date_2018 = '2018-01-01', '2018-12-31'
  start_date_2018, end_date_2018 = '2018-01-01', '2018-12-31'
  start_date_2019, end_date_2019 = '2019-01-01', '2019-12-31'
  start_date_2020, end_date_2020 = '2020-01-01', '2020-12-31'
  start_date_2021, end_date_2021 = '2021-01-01', '2021-12-31'
  start_date_2022, end_date_2022 = '2022-01-01', '2022-12-31'

  s2_image_collection = ee.ImageCollection('COPERNICUS/S2_HARMONIZED') \
                        .filter(ee.Filter.bounds(geometry)) \
                        .filter(ee.Filter.date(start_date, end_date)) \
                        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 70)) \
                        .map(maskS2clouds)

  s2ImageCollectionWithIndices = s2_image_collection.map(renamebandsS2).map(NDMI).map(NDWI) \
                                  .map(add_ci).map(NDVI)

  s2_2017ImageCollection = s2ImageCollectionWithIndices.filter(ee.Filter.date(start_date_2017, end_date_2017))
  s2_2018ImageCollection = s2ImageCollectionWithIndices.filter(ee.Filter.date(start_date_2018, end_date_2018))
  s2_2019ImageCollection = s2ImageCollectionWithIndices.filter(ee.Filter.date(start_date_2019, end_date_2019))
  s2_2020ImageCollection = s2ImageCollectionWithIndices.filter(ee.Filter.date(start_date_2020, end_date_2020))
  s2_2021ImageCollection = s2ImageCollectionWithIndices.filter(ee.Filter.date(start_date_2021, end_date_2021))
  s2_2022ImageCollection = s2ImageCollectionWithIndices.filter(ee.Filter.date(start_date_2022, end_date_2022))
  s2_allOtherYearsImageCollection = s2ImageCollectionWithIndices.filter(ee.Filter.date('2023-01-01', end_date))

  esri_lulc_2017 = land_cover_esri_image_collection.filterDate(start_date_2017,end_date_2017).mosaic()
  esri_lulc_2018 = land_cover_esri_image_collection.filterDate(start_date_2018,end_date_2018).mosaic()
  esri_lulc_2019 = land_cover_esri_image_collection.filterDate(start_date_2019,end_date_2019).mosaic()
  esri_lulc_2020 = land_cover_esri_image_collection.filterDate(start_date_2020,end_date_2020).mosaic()
  esri_lulc_2021 = land_cover_esri_image_collection.filterDate(start_date_2021,end_date_2021).mosaic()
  esri_lulc_2022 = land_cover_esri_image_collection.filterDate(start_date_2022,end_date_2022).mosaic()

  s2_2017ImageCollectionWithLulc = s2_2017ImageCollection.map(add_lulc(esri_lulc_2017))
  s2_2018ImageCollectionWithLulc = s2_2018ImageCollection.map(add_lulc(esri_lulc_2018))
  s2_2019ImageCollectionWithLulc = s2_2019ImageCollection.map(add_lulc(esri_lulc_2019))
  s2_2020ImageCollectionWithLulc = s2_2020ImageCollection.map(add_lulc(esri_lulc_2020))
  s2_2021ImageCollectionWithLulc = s2_2021ImageCollection.map(add_lulc(esri_lulc_2021))
  s2_2022ImageCollectionWithLulc = s2_2022ImageCollection.map(add_lulc(esri_lulc_2022))
  s2_allOtherYearsImageCollectionWithLulc = s2_allOtherYearsImageCollection.map(add_lulc(esri_lulc_2022))

  all_ImageCollection = s2_2017ImageCollectionWithLulc.merge(s2_2018ImageCollectionWithLulc).merge(s2_2019ImageCollectionWithLulc) \
                          .merge(s2_2020ImageCollectionWithLulc).merge(s2_2021ImageCollectionWithLulc).merge(s2_2022ImageCollectionWithLulc) \
                          .merge(s2_allOtherYearsImageCollectionWithLulc)

  return all_ImageCollection



Xarray dataset

In [54]:
def getXarrayImageCollection(ImageCollection, geometry=geometry):
  ds_ = xarray.open_dataset(
    ImageCollection,
    engine='ee',
    crs='EPSG:3857',
    scale=10,
    geometry=geometry,
  )
  return ds_

export tiff function

In [53]:
def exportTiff(XarrayImageCollection, outputFolder, sensor='Sentinel_2'):
    count = 0
    for time in XarrayImageCollection.time.values:
        image = XarrayImageCollection.sel(time=time)
        image = image \
        .rename({'Y': 'y', 'X': 'x'}) \
        .transpose('y', 'x') \
        .rio.write_crs('EPSG:4326')

        date = np.datetime_as_string(time, unit='D')
        output_file = f'{sensor}_{date}_.tif'
        output_path = os.path.join(outputFolder, output_file)
        image.rio.to_raster(output_path, driver='COG')
        count += 1

    return dict(exported_images=count)

# Implementation

create the image collection

In [55]:
# can pass geometry to change the area of interest in this fucntion
s2ImageCollectionWithLuLc = getImageCollection()

xarray dataset from the image collection

In [56]:
ds_array = getXarrayImageCollection(s2ImageCollectionWithLuLc)
ds_array = ds_array.chunk('auto')
ds_array

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 2.82 GiB 126.51 MiB Shape (438, 1129, 765) (255, 255, 255) Dask graph 30 chunks in 2 graph layers Data type float64 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,2.82 GiB,126.51 MiB
Shape,"(438, 1129, 765)","(255, 255, 255)"
Dask graph,30 chunks in 2 graph layers,30 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,127.36 MiB
Shape,"(438, 1129, 765)","(322, 322, 322)"
Dask graph,24 chunks in 2 graph layers,24 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 127.36 MiB Shape (438, 1129, 765) (322, 322, 322) Dask graph 24 chunks in 2 graph layers Data type float32 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,127.36 MiB
Shape,"(438, 1129, 765)","(322, 322, 322)"
Dask graph,24 chunks in 2 graph layers,24 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,127.36 MiB
Shape,"(438, 1129, 765)","(322, 322, 322)"
Dask graph,24 chunks in 2 graph layers,24 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 127.36 MiB Shape (438, 1129, 765) (322, 322, 322) Dask graph 24 chunks in 2 graph layers Data type int32 numpy.ndarray",765  1129  438,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,127.36 MiB
Shape,"(438, 1129, 765)","(322, 322, 322)"
Dask graph,24 chunks in 2 graph layers,24 chunks in 2 graph layers
Data type,int32 numpy.ndarray,int32 numpy.ndarray


Unify the chunks for ease of export

In [34]:
ds_array_unified  = ds_array.unify_chunks()

filter data and dask dataframe

In [36]:
ds_array_2018 = ds_array_unified.sel(time=slice('2018-01-01', '2018-12-31')) # you can chnage this filter
daskDF_2018 = ds_array_2018.to_dask_dataframe()

export data

In [38]:
# export as csv
csv_folder_path = '/content/drive/MyDrive/Amini/ML' # replace me

# daskDF_2018.to_csv(f'{csv_folder_path}/_2018_time_series.csv')

In [None]:
# to netcdf file
# ds_array_2018.to_netcdf(f'{csv_folder_path}_2018_timeseries.nc')

In [49]:
geotif_folder = '/content/drive/MyDrive/Amini/ML/geotif' #replace me

In [None]:
# export geotif files
geotif_export = exportTiff(ds_array, geotif_folder)
geotif_export