In [1]:
import pystac
import pystac_client
import odc
from pystac_client import Client
from pystac.extensions.eo import EOExtension as eo
from odc.stac import stac_load
import planetary_computer as pc
pc.settings.set_subscription_key('85c168c81886441d89c30d0bd8613cc0')

In [2]:
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
scl_colormap = np.array(
    [
        [252,  40, 228, 255],  # 0  - NODATA - MAGENTA
        [255,   0,   4, 255],  # 1  - Saturated or Defective - RED
        [0  ,   0,   0, 255],  # 2  - Dark Areas - BLACK
        [97 ,  97,  97, 255],  # 3  - Cloud Shadow - DARK GREY
        [3  , 139,  80, 255],  # 4  - Vegetation - GREEN
        [192, 132,  12, 255],  # 5  - Bare Ground - BROWN
        [21 , 103, 141, 255],  # 6  - Water - BLUE
        [117,   0,  27, 255],  # 7  - Unclassified - MAROON
        [208, 208, 208, 255],  # 8  - Cloud - LIGHT GREY
        [244, 244, 244, 255],  # 9  - Definitely Cloud - WHITE
        [195, 231, 240, 255],  # 10 - Thin Cloud - LIGHT BLUE
        [222, 157, 204, 255],  # 11 - Snow or Ice - PINK
    ],
    dtype="uint8",
)
resolution = 20 # meters per pixel
scale = resolution / 111320.0 # degrees per pixel for CRS:4326 

In [4]:
# Create a mask for no data, saturated data, clouds, cloud shadows, and water
def create_cloud_mask(xx):
    cloud_mask = \
        (xx.SCL != 0) & \
        (xx.SCL != 1) & \
        (xx.SCL != 3) & \
        (xx.SCL != 6) & \
        (xx.SCL != 8) & \
        (xx.SCL != 9) & \
        (xx.SCL != 10) 
    return cloud_mask
def further_remove_cloud(items):
    i = 0
    removal_list = []
    while i < len(items):
        if items[i].properties['eo:cloud_cover'] > 60:
            removal_list.append(i)
        i += 1
    k = 0
    for k in range(len(removal_list)):
        items.pop(removal_list[k]-k)
    return items

In [7]:
crop_presence_data = pd.read_csv("Crop_Location_Data.csv")
box_size_deg = 0.000896
def get_ndvi_data(latlong, time):
    latlong=latlong.replace('(','').replace(')','').replace(' ','').split(',')
    min_lat = float(latlong[0]) - box_size_deg/2
    min_long = float(latlong[1]) - box_size_deg/2
    max_lat = float(latlong[0]) + box_size_deg/2
    max_long = float(latlong[1]) + box_size_deg/2
    bbox_of_interest = (min_long, min_lat, max_long, max_lat)
    time_of_interest = time
    catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
    search = catalog.search(collections=["sentinel-2-l2a"], bbox=bbox_of_interest, datetime=time_of_interest)
    items = list(search.get_all_items())
    items = further_remove_cloud(items)
    xx = stac_load(
        items,
        bands=["red", "green", "blue", "nir", "SCL"],
        crs="EPSG:4326", # Latitude-Longitude
        resolution=scale, # Degrees
        chunks={"x": 2048, "y": 2048},
        dtype="uint16",
        patch_url=pc.sign,
        bbox=bbox_of_interest
    )
    cleaned_data = xx.where(create_cloud_mask(xx)).astype("uint16")
    mean_clean = cleaned_data.mean(dim=['longitude','latitude']).compute()
    ndvi_mean_clean = (mean_clean.nir-mean_clean.red)/(mean_clean.nir+mean_clean.red)
    ndvi_mean_clean=ndvi_mean_clean.to_numpy()
    ndvi_mean_clean = ndvi_mean_clean[~np.isnan(ndvi_mean_clean)]
    try:
        return max(ndvi_mean_clean), min(ndvi_mean_clean)
    except ValueError:
        return 0,0

In [None]:
time = "2021-12-01/2022-04-30"
max_ndvis = []
min_ndvis = []
#max_ndvi, min_ndvi, mean, std = get_ndvi_data(crop_presence_data['Latitude and Longitude'].iloc[400], time)
#print(max_ndvi, min_ndvi, mean, std)

for coordinates in tqdm(crop_presence_data['Latitude and Longitude']):
    max_ndvi, min_ndvi = get_ndvi_data(coordinates, time)
    max_ndvis.append(max_ndvi)
    min_ndvis.append(min_ndvi)
    

crop_presence_data['max_ndvi'] = max_ndvis
crop_presence_data['min_ndvi'] = min_ndvis

 34%|███▍      | 203/600 [06:20<11:39,  1.76s/it]

In [10]:
target = []
for i in crop_presence_data.index:
    if crop_presence_data['Class of Land'].iloc[i] == "Rice":
        target.append(1)
    else:
        target.append(0)
crop_presence_data['target'] = target
crop_presence_data

Unnamed: 0,Latitude and Longitude,Class of Land,max_ndvi,min_ndvi,target
0,"(10.323727047081501, 105.2516346045924)",Rice,0.900975,0.014293,1
1,"(10.322364360592521, 105.27843410554115)",Rice,0.888940,0.010550,1
2,"(10.321455902933202, 105.25254306225168)",Rice,0.881617,0.030606,1
3,"(10.324181275911162, 105.25118037576274)",Rice,0.877016,-0.014808,1
4,"(10.324635504740822, 105.27389181724476)",Rice,0.897256,-0.025047,1
...,...,...,...,...,...
595,"(10.013942985253381, 105.67361318732796)",Non Rice,0.863565,0.274916,0
596,"(10.01348875642372, 105.67361318732796)",Non Rice,0.841273,0.293173,0
597,"(10.013034527594062, 105.67361318732796)",Non Rice,0.839577,0.302854,0
598,"(10.012580298764401, 105.67361318732796)",Non Rice,0.876839,0.279089,0


In [None]:
from sklearn.tree import DecisionTreeClassifier
X = crop_data[['max_ndvi', 'min_ndvi']]
y = crop_data[['target']]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2)
dtc = DecisionTreeClassifier()
dtc = dtc.fit(X_train, y_train)

In [None]:
from sklearn import tree
text_representation = tree.export_text(dtc)
print(text_representation)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

predictions = dtc.predict(X_test)
print(predictions)
print(y_test)
ac = accuracy_score(y_test, predictions)
f1 = f1_score(y_test, predictions)
print(ac, f1)

In [None]:
test = pd.read_csv("test.csv")
result = []
time = "2021-12-01/2022-04-30"
max_ndvis = []
min_ndvis = []
#max_ndvi, min_ndvi = get_ndvi_data(crop_presence_data['Latitude and Longitude'].iloc[400], time)
for coordinates in tqdm(test['id']):
    max_ndvi, min_ndvi, avg_submit, std_submit = get_ndvi_data(coordinates, time)
    max_ndvis.append(max_ndvi)
    min_ndvis.append(min_ndvi)
    

test['max_ndvi'] = max_ndvis
test['min_ndvi'] = min_ndvis
x = test[['max_ndvi', 'min_ndvi']]
test_pred = dtc.predict(x)
target = []
for i in range(len(test_pred)):
    if test_pred[i] == 1:
        target.append("Rice")
    else:
        target.append("Non Rice")
test['target'] = target
test.drop(columns = ['max_ndvi', 'min_ndvi'], inplace=True)
test.to_csv("submissionoriginal.csv", index=False)