<a href="https://colab.research.google.com/github/climasig/climasig.github.io/blob/main/CropHarvest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CropHarvest Demo

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nasaharvest/cropharvest/blob/main/demo.ipynb)

**Authors**: Gabriel Tseng, Ivan Zvonkov

**Description**: This notebook demonstrates the capabilities of the CropHarvest package by training and testing a model on a subset of the data and then running inference using the trained model.

In [1]:
# Download from PyPI
!pip install cropharvest -q

# Download from TestPyPI
#!pip install -i https://test.pypi.org/simple/ cropharvest --extra-index-url https://pypi.python.org/simple -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m809.8/809.8 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m994.8/994.8 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m46.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip freeze | grep cropharvest

In [None]:
from cropharvest.datasets import CropHarvest
from cropharvest.inference import Inference
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier

import requests
import tempfile

DATA_DIR = "data"

!mkdir $DATA_DIR

## Load datasets

In [None]:
evaluation_datasets = CropHarvest.create_benchmark_datasets(DATA_DIR)
evaluation_datasets

## Split Togo data into X and y

In [None]:
togo_dataset = evaluation_datasets[-1]
X, y = togo_dataset.as_array(flatten_x=True)

assert X.shape[0] == 1290
assert y.shape[0] == 1290
assert X.shape[1] == 216

X.shape, y.shape

## Train a Random Forest model on the Togo dataset

In [None]:
model = RandomForestClassifier(random_state=0)
model.fit(X, y)

## Make predictions on Togo test set

In [None]:
test_preds, test_instances = [], []
for _, test_instance in togo_dataset.test_data(flatten_x=True):
    test_preds.append(model.predict_proba(test_instance.x)[:, 1])
    test_instances.append(test_instance)

print(
    f"For the Random Forest classifier, "
    f"{test_instances[0].evaluate_predictions(test_preds[0])}, "
)

metrics = test_instances[0].evaluate_predictions(test_preds[0])
assert metrics["f1_score"] > 0.73, "Default model f1-score should be greater than 0.73"
assert metrics["auc_roc"] > 0.88, "Default model AUC-ROC should be greater than 0.88"

## Get test file for inference

In [None]:
test_file = "98-togo_2019-02-06_2020-02-01.tif"

temp_dir = tempfile.gettempdir()
p = Path(temp_dir) / test_file
response = requests.get(
    f"https://github.com/nasaharvest/cropharvest/blob/main/test/cropharvest/{test_file}?raw=true",
)
with p.open("wb") as f:
    f.write(response.content)

## Run inference

In [None]:
preds = Inference(model=model, normalizing_dict=None).run(p)

# Check size
assert preds.dims["lat"] == 17
assert preds.dims["lon"] == 17

# Check all predictions between 0 and 1
assert preds.min() >= 0
assert preds.max() <= 1

preds

## [Optional] Visualize model prediction

In [None]:
!pip install matplotlib -q

In [None]:
import matplotlib.pyplot as plt

In [None]:
preds_np = preds.to_array()[0]
plt.pcolormesh(preds_np.lon, preds_np.lat, preds_np.data)
plt.xlabel("Longitude")
plt.ylabel("Latitude");