In [1]:
import pandas as pd
import pystac
import pystac_client
import odc
from pystac_client import Client
from pystac.extensions.eo import EOExtension as eo
from odc.stac import stac_load
import planetary_computer as pc
import plotly.express as px
pc.settings.set_subscription_key('6d4762f1152d42a285532dd26ea62836')
from datetime import datetime, timedelta

In [2]:
train_path = '../data/train.csv'
test_path = '../data/test.csv'

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

In [3]:
train_df

Unnamed: 0,District,Latitude,Longitude,"Season(SA = Summer Autumn, WS = Winter Spring)","Rice Crop Intensity(D=Double, T=Triple)",Date of Harvest,Field size (ha),Rice Yield (kg/ha)
0,Chau_Phu,10.510542,105.248554,SA,T,15-07-2022,3.40,5500
1,Chau_Phu,10.509150,105.265098,SA,T,15-07-2022,2.43,6000
2,Chau_Phu,10.467721,105.192464,SA,D,15-07-2022,1.95,6400
3,Chau_Phu,10.494453,105.241281,SA,T,15-07-2022,4.30,6000
4,Chau_Phu,10.535058,105.252744,SA,D,14-07-2022,3.30,6400
...,...,...,...,...,...,...,...,...
552,Thoai_Son,10.364419,105.164984,WS,T,12-04-2022,7.80,6640
553,Thoai_Son,10.358094,105.189541,WS,T,12-04-2022,2.00,7200
554,Thoai_Son,10.368014,105.238516,WS,T,12-04-2022,6.20,7200
555,Thoai_Son,10.275419,105.234563,WS,T,20-04-2022,3.00,6400


In [4]:
def get_data(row, history=120, resolution=10, surrounding_box=0.1): # history = num of days to take before haversting
    longitude = row['Longitude']
    latitude = row['Latitude']
    min_longitude = longitude - surrounding_box / 2
    min_latitude = latitude - surrounding_box / 2
    max_longitude = longitude + surrounding_box / 2
    max_latitude = latitude + surrounding_box / 2
    bbox = [min_longitude, min_latitude, max_longitude, max_latitude]
    
    havest_date = row['Date of Harvest']
    havest_datetime = datetime.strptime(havest_date, '%d-%m-%Y')
    sowing_datetime = havest_datetime - timedelta(days=history)
    time_period = f'{sowing_datetime.strftime("%Y-%m-%d")}/{havest_datetime.strftime("%Y-%m-%d")}'
    
    catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
    search = catalog.search(collections=["sentinel-2-l2a"], bbox=bbox, datetime=time_period)
    items = list(search.get_all_items())
    
    scale = resolution / 111320.0
    
    if len(items) > 35:
        print(len(items))
        data = stac_load(
            items,
            bands=["red", "green", "blue", "nir", "rededge", "B05", "B06", "B07", "SCL"],
            crs="EPSG:4326", # Latitude-Longitude
            resolution=scale, # Degrees
            chunks={"x": 2048, "y": 2048},
            dtype="uint16",
            patch_url=pc.sign,
            bbox=bbox
        )

        print(data)
        print()

In [5]:
train_df.apply(get_data, axis=1)

46
<xarray.Dataset>
Dimensions:      (latitude: 1115, longitude: 1114, time: 23)
Coordinates:
  * latitude     (latitude) float64 10.57 10.57 10.57 ... 10.47 10.47 10.47
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2022-03-26T03:15:41.024000 ... 2022-07...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), met

46
<xarray.Dataset>
Dimensions:      (latitude: 1114, longitude: 1114, time: 23)
Coordinates:
  * latitude     (latitude) float64 10.67 10.67 10.67 ... 10.57 10.57 10.57
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2022-04-10T03:15:39.024000 ... 2022-08...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), met

<xarray.Dataset>
Dimensions:      (latitude: 1114, longitude: 1114, time: 23)
Coordinates:
  * latitude     (latitude) float64 10.64 10.64 10.64 ... 10.54 10.54 10.54
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2022-04-10T03:15:39.024000 ... 2022-08...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=n

46
<xarray.Dataset>
Dimensions:      (latitude: 1114, longitude: 1114, time: 23)
Coordinates:
  * latitude     (latitude) float64 10.58 10.58 10.58 ... 10.48 10.48 10.48
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2022-03-31T03:15:39.024000 ... 2022-07...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), met

49
<xarray.Dataset>
Dimensions:      (latitude: 1114, longitude: 1114, time: 24)
Coordinates:
  * latitude     (latitude) float64 10.6 10.59 10.59 10.59 ... 10.5 10.5 10.5
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2021-12-11T03:21:29.024000 ... 2022-04...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), m

49
<xarray.Dataset>
Dimensions:      (latitude: 1114, longitude: 1114, time: 24)
Coordinates:
  * latitude     (latitude) float64 10.68 10.68 10.68 ... 10.58 10.58 10.58
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2021-12-11T03:21:29.024000 ... 2022-04...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), met

49
<xarray.Dataset>
Dimensions:      (latitude: 1114, longitude: 1114, time: 24)
Coordinates:
  * latitude     (latitude) float64 10.64 10.64 10.64 ... 10.54 10.54 10.54
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2021-12-11T03:21:29.024000 ... 2022-04...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), met

46
<xarray.Dataset>
Dimensions:      (latitude: 1114, longitude: 1114, time: 23)
Coordinates:
  * latitude     (latitude) float64 10.46 10.46 10.46 ... 10.36 10.36 10.36
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2022-03-31T03:15:39.024000 ... 2022-07...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), met

49
<xarray.Dataset>
Dimensions:      (latitude: 1115, longitude: 1114, time: 23)
Coordinates:
  * latitude     (latitude) float64 10.44 10.44 10.44 ... 10.34 10.34 10.34
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2021-11-26T03:20:51.024000 ... 2022-03...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), met

49
<xarray.Dataset>
Dimensions:      (latitude: 1114, longitude: 1114, time: 23)
Coordinates:
  * latitude     (latitude) float64 10.46 10.46 10.46 ... 10.36 10.36 10.36
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2021-12-01T03:21:09.024000 ... 2022-03...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), met

46
<xarray.Dataset>
Dimensions:      (latitude: 1114, longitude: 1114, time: 23)
Coordinates:
  * latitude     (latitude) float64 10.38 10.38 10.38 ... 10.28 10.28 10.28
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2022-03-31T03:15:39.024000 ... 2022-07...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1114, 1114), met

47
<xarray.Dataset>
Dimensions:      (latitude: 1115, longitude: 1114, time: 23)
Coordinates:
  * latitude     (latitude) float64 10.33 10.33 10.33 ... 10.23 10.23 10.23
  * longitude    (longitude) float64 105.1 105.1 105.1 ... 105.2 105.2 105.2
    spatial_ref  int32 4326
  * time         (time) datetime64[ns] 2021-12-26T03:21:41.024000 ... 2022-04...
Data variables:
    red          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    green        (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    blue         (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    nir          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    rededge      (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), meta=np.ndarray>
    B05          (time, latitude, longitude) uint16 dask.array<chunksize=(1, 1115, 1114), met

0      None
1      None
2      None
3      None
4      None
       ... 
552    None
553    None
554    None
555    None
556    None
Length: 557, dtype: object