<a href="https://colab.research.google.com/github/boothmanrylan/historicalFiresQC/blob/main/historicalFiresQCMain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Clone Repository and Load Modules

In [None]:
import tensorflow as tf
import os
from google.colab import drive, auth
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import ee
import glob

drive.mount('/content/drive/')
%cd /content/drive/My\ Drive/

auth.authenticate_user()

ee.Authenticate()
ee.Initialize()

# if expected files don't exist then clone from github
! if [ ! -d ./historicalFiresQC ]; then git clone https://github.com/boothmanrylan/historicalFiresQC.git; fi

%cd historicalFiresQC

! git checkout .
! git pull

! pip install q -r ./requirements.txt

import model as Model
import data as Data
import assessment as Assessment
import visualize as Visualize
import main as Main

# Reload Modules

This is only necessary to run if changes have been made to the historicalFiresQC modules since you started using the colab notebook. It will pull the changes and reload the modules, allowing the changes to be incorporated without having to restart the runtime.

In [None]:
! git pull

from importlib import reload

Model = reload(Model)
Data = reload(Data)
Assessment = reload(Assessment)
Visualize = reload(Visualize)
Main = reload(Main)

# Set Parameters

In [None]:
params = {
    'bucket':                    'gs://boothmanrylan',         # google cloud storage bucket
    'data_folder':               'historicalFiresQCInputData', # folder inside bucket containing data
    'model_folder':              'historicalFiresQCModels',    # folder inside bucket where models will be saved
    'annotation_type':           'level_slice',               # how annotations were made: level_slice or bounding_box
    'output':                    'burn',                        # what to predict: all, burn_age, burn
    'shape':                     (128, 128),                   # size of 1 input image without bands or batch dimension
    'kernel':                    32,                           # edge buffer around each patch
    'batch_size':                64,                           # Number of images to consider at once
    'stack_image':               False,                        # whether to include the previous image as input to the model
    'include_previous_burn_age': False,                        # whether to include the previous burn age as input to the model
    'include_previous_class':    False,
    'burn_age_function':         'scale',                      # function applied to burn age: scale, log, sigmoid, None
    'learning_rate':             1e-4,
    'epochs':                    50,
    'steps_per_epoch':           100,                          # number of batches run through model in one epoch
    'train_model':               False,                        # if false only inference happens 
    'load_model':                True,                         # if true previously trained model weights will be loaded
    'loss_function':             'basic',                      # basic, weigher, or reference_point
    'store_predictions':         False,                         # if true model predictions will be stored in model_folder
    'augment_data':              False,
    'assess_model':              False
}

# Run Model

In [None]:
output = Main.main(**params)

# Visualize Results

In [None]:
output['assessment'] # display the accuracy assessment table

In [None]:
if params['output'] == 'burn_age':
    max_annot = output['burn_age_function'](3650)
elif params['output'] == 'burn':
    max_annot = 1
else:
    max_annot = None

In [None]:
Visualize.visualize(
    output['train_dataset'],
    model=output['model'],
    stacked_image=params['stack_image'],
    include_prev_burn_age=params['include_previous_burn_age'],
    include_prev_class=params['include_previous_class'],
    max_annot=max_annot,
    max_burn_age=output['burn_age_function'](3650)
)

In [None]:
Visualize.visualize(
    output['val_dataset'],
    model=output['model'],
    stacked_image=params['stack_image'],
    include_prev_burn_age=params['include_previous_burn_age'],
    include_prev_class=params['include_previous_class'],
    max_annot=max_annot,
    max_burn_age=output['burn_age_function'](3650)
)


# Upload Results
Must run main with store_predictions set to True before running thisd

In [None]:
import time
import json
from IPython.utils import io

ee_user = 'users/boothmanrylan'
ee_folder = 'historicalFiresQCResults'

all_files = !gsutil ls {output['data_folder']}
mixers = [x for x in all_files if 'json' in x]
mixers.sort()

all_files = !gsutil ls {output['model_folder']}
tfrecords = [x for x in all_files if '.tfrecord' in x]
tfrecords.sort()

model_number = output['model_number']

image_collection = os.path.join(ee_user, ee_folder, f'{model_number:05d}')

!earthengine create collection {image_collection}
uploads = {}
for i, (t, m) in enumerate(zip(tfrecords, mixers)):
    f = m.replace(output['data_folder'] + '/', '').replace('-mixer.json', '')
    asset_id = os.path.join(image_collection, f)
    print(f'Uploading to {asset_id}')
    with io.capture_output() as captured:
        !earthengine upload image --asset_id={asset_id} {t} {m}
    task_id = captured.stdout.split(' ')[-1].strip()
    uploads[asset_id] = [task_id, t, m]

successes = []
while len(successes) < len(tfrecords):
    time.sleep(60)
    for asset, task in uploads.items():
        if asset in successes: continue
        with io.capture_output() as captured:
            !earthengine task info {task[0]}
        status = captured.stdout.split('State: ')[-1].split(' ')[0].strip()
        if status == 'COMPLETED':
            print(f'Successfully uploaded {asset}')
            successes.append(asset)
        elif status == 'FAILED':
            if 'Cannot read mixer' in captured.stdout:
                print(f'Rerunning upload of {asset}')
                with io.capture_output() as captured2:
                    !earthengine upload image --asset_id{asset} {task[1]} {task[2]}
                new_task_id = captured2.stdout.split(' ')[-1].strip()
                uploads[asset] = [new_task_id, task[1], task[2]]
            elif 'Cannot overwrite asset' in captured.stdout:
                print(f'{asset} already uploaded')
                successes.append(asset)
            else:
                print(f'{asset} failed for unknown reasons; treating as success')
                successes.append(asset)
        elif status == 'RUNNING':
            print(f'{asset} still running')
        else:
            print(f'{asset} has unknown status skipping')
            successes.append(asset)
    print(f'Successfully uploaded: {len(successes)} files')

In [None]:
task_ids

In [None]:
!earthengine task info OBNMKAS5ACYUNZQDF6DPYS3Y