In [None]:
"""
This file is used to split the test dataset into subsets according to spread speed.
"""

In [None]:
!pip install pandas numpy tensorflow

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf
import numpy as np
import os

# Function to write a list of TFRecords to a specified file
def write_tfrecord(file_path, dataset):
    # Ensure the directory exists; create it if not
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    # Create a TFRecordWriter for the specified file path
    writer = tf.io.TFRecordWriter(file_path)

    # Write each example in the dataset to the TFRecord file
    for example in dataset:
        writer.write(example.SerializeToString())

    # Close the writer to finalize the file
    writer.close()

# Function to parse a TFRecord into a TensorFlow Example object
def parse_example(record):
    example = tf.train.Example()
    example.ParseFromString(record.numpy())
    return example

# Function to count the number of features in a TFRecord example
def count_features(example):
    return len(example.features.feature.keys())

# Function to classify fire change between two masks: PrevFireMask and FireMask
def classify_fire_change(prev_fire_mask, fire_mask):
    # Convert the PrevFireMask and FireMask to NumPy arrays and reshape them to 64x64
    prev_fire_mask_array = np.array(prev_fire_mask.float_list.value).reshape((64, 64))
    fire_mask_array = np.array(fire_mask.float_list.value).reshape((64, 64))

    # Filter out invalid data points (-1 values)
    valid_prev_fire_mask = prev_fire_mask_array[prev_fire_mask_array != -1]
    valid_fire_mask = fire_mask_array[fire_mask_array != -1]

    # Count the number of active fire pixels (values > 0) in both masks
    prev_fire_count = np.sum(valid_prev_fire_mask > 0.0)
    current_fire_count = np.sum(valid_fire_mask > 0.0)

    # Calculate the ratio of the current fire count to the previous fire count
    ratio = current_fire_count / prev_fire_count if prev_fire_count > 0 else 0

    # Print debug information to help trace computations
    print(f"Debug - prev_fire_count: {prev_fire_count}, current_fire_count: {current_fire_count}, ratio: {ratio:.2f}")

    # Classify the change in fire spread based on the ratio and counts
    if current_fire_count == 0 and prev_fire_count > 0:
        return "Fire extinguished"
    elif current_fire_count < prev_fire_count * 0.5:
        return "Quickly reduced"
    elif current_fire_count < prev_fire_count:
        return "Slowly reduced"
    elif current_fire_count > prev_fire_count * 1.5:
        return "Quickly spread"
    else:
        return "Slowly spread"

# Function to parse and classify a raw TFRecord
def _parse_and_classify_fn(raw_record):
    # Parse the raw record into a TensorFlow Example object
    example = parse_example(raw_record)

    # Extract the PrevFireMask and FireMask features from the example
    prev_fire_mask = example.features.feature['PrevFireMask']
    fire_mask = example.features.feature['FireMask']

    # Classify the fire change based on the masks
    fire_classification = classify_fire_change(prev_fire_mask, fire_mask)

    return example, prev_fire_mask, fire_mask, fire_classification

# Initialize lists to hold records for each fire classification
fire_extinguished_records = []
quickly_reduced_records = []
slowly_reduced_records = []
slowly_spread_records = []
quickly_spread_records = []

# Load the dataset using the specified file pattern
file_pattern = '/content/drive/My Drive/northamerica_2012-2023/test/*'
raw_dataset = tf.data.TFRecordDataset(tf.io.gfile.glob(file_pattern))

print("Starting to parse the dataset...")
for raw_record in raw_dataset:
    try:
        # Parse each record into an Example object
        example = parse_example(raw_record)

        # Check if the record has 13 features (specific to your dataset)
        if count_features(example) == 13:
            # Parse and classify the fire change based on the PrevFireMask and FireMask
            parsed_features, prev_fire_mask, fire_mask, classification = _parse_and_classify_fn(raw_record)

            # Extract and reshape the PrevFireMask and FireMask features
            prev_fire_mask_value = prev_fire_mask.float_list.value
            fire_mask_value = fire_mask.float_list.value
            prev_fire_mask_array = np.array(prev_fire_mask_value).reshape((64, 64))
            fire_mask_array = np.array(fire_mask_value).reshape((64, 64))

            # Count the number of active fire pixels (value of 1) in both masks
            count_1_prev = np.sum(prev_fire_mask_array == 1)
            count_1_fire = np.sum(fire_mask_array == 1)

            # Calculate the total number of valid pixels (non -1) and their ratios
            total_non_minus_1_prev = np.sum(prev_fire_mask_array != -1)
            total_non_minus_1_fire = np.sum(fire_mask_array != -1)
            ratio_prev = count_1_prev / total_non_minus_1_prev if total_non_minus_1_prev > 0 else 0
            ratio_fire = count_1_fire / total_non_minus_1_fire if total_non_minus_1_fire > 0 else 0

            # Print classification and ratio details for each record
            print(f"Record classification: {classification}")
            print(f"PrevFireMask - Count of 1 values: {count_1_prev}, Total non -1 values: {total_non_minus_1_prev}, Ratio: {ratio_prev:.2f}")
            print(f"FireMask - Count of 1 values: {count_1_fire}, Total non -1 values: {total_non_minus_1_fire}, Ratio: {ratio_fire:.2f}")

            # Append the example to the appropriate list based on classification
            if classification == "Fire extinguished":
                fire_extinguished_records.append(example)
            elif classification == "Quickly reduced":
                quickly_reduced_records.append(example)
            elif classification == "Slowly reduced":
                slowly_reduced_records.append(example)
            elif classification == "Slowly spread":
                slowly_spread_records.append(example)
            else:
                quickly_spread_records.append(example)
    except Exception as e:
        # Print error messages if parsing fails
        print(f"Error parsing record: {e}")
        print(f"Record: {raw_record}")

# Save each classification to a separate TFRecord file and print the number of records
write_tfrecord('/content/drive/MyDrive/new_wildfire_subsets_by_spreadspeed/fire_extinguished_data.tfrecords', fire_extinguished_records)
print(f"Fire extinguished records: {len(fire_extinguished_records)}")

write_tfrecord('/content/drive/MyDrive/new_wildfire_subsets_by_spreadspeed/quickly_reduced_data.tfrecords', quickly_reduced_records)
print(f"Quickly reduced records: {len(quickly_reduced_records)}")

write_tfrecord('/content/drive/MyDrive/new_wildfire_subsets_by_spreadspeed/slowly_reduced_data.tfrecords', slowly_reduced_records)
print(f"Slowly reduced records: {len(slowly_reduced_records)}")

write_tfrecord('/content/drive/MyDrive/new_wildfire_subsets_by_spreadspeed/slowly_spread_data.tfrecords', slowly_spread_records)
print(f"Slowly spread records: {len(slowly_spread_records)}")

write_tfrecord('/content/drive/MyDrive/new_wildfire_subsets_by_spreadspeed/quickly_spread_data.tfrecords', quickly_spread_records)
print(f"Quickly spread records: {len(quickly_spread_records)}")

print("Data has been classified and saved into respective TFRecord files.")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
PrevFireMask - Count of 1 values: 73, Total non -1 values: 4096, Ratio: 0.02
FireMask - Count of 1 values: 50, Total non -1 values: 4096, Ratio: 0.01
Debug - prev_fire_count: 2, current_fire_count: 17, ratio: 8.50
Record classification: Quickly spread
PrevFireMask - Count of 1 values: 2, Total non -1 values: 4096, Ratio: 0.00
FireMask - Count of 1 values: 17, Total non -1 values: 4096, Ratio: 0.00
Debug - prev_fire_count: 11, current_fire_count: 108, ratio: 9.82
Record classification: Quickly spread
PrevFireMask - Count of 1 values: 11, Total non -1 values: 4096, Ratio: 0.00
FireMask - Count of 1 values: 108, Total non -1 values: 4096, Ratio: 0.03
Debug - prev_fire_count: 1, current_fire_count: 13, ratio: 13.00
Record classification: Quickly spread
PrevFireMask - Count of 1 values: 1, Total non -1 values: 4096, Ratio: 0.00
FireMask - Count of 1 values: 13, Total non -1 values: 4096, Ratio: 0.00
Debug - prev_fire_count: 16