In [None]:
"""
This file is used to split the test dataset into sub test sets according to large, medium, and small fire.
"""

In [None]:
!pip install pandas numpy tensorflow




In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from google.colab import drive


In [None]:

drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import tensorflow as tf
import numpy as np

# Function to write a list of TFRecords to a file
def write_tfrecord(file_path, dataset):
    writer = tf.io.TFRecordWriter(file_path)
    for example in dataset:
        writer.write(example.SerializeToString())
    writer.close()

# Function to parse a TFRecord into a TensorFlow Example object
def parse_example(record):
    example = tf.train.Example()
    example.ParseFromString(record.numpy())
    return example

# Function to count the number of features in a TFRecord example
def count_features(example):
    return len(example.features.feature.keys())

# Function to classify the fire size based on the PrevFireMask feature
def classify_fire(prev_fire_mask):
    # Convert the PrevFireMask to a NumPy array and reshape to 64x64
    prev_fire_mask_array = np.array(prev_fire_mask.float_list.value).reshape((64, 64))

    # Count the number of -1, 0, and 1 values in the array
    count_minus_1 = np.sum(prev_fire_mask_array == -1)
    count_0 = np.sum(prev_fire_mask_array == 0)
    count_1 = np.sum(prev_fire_mask_array == 1)

    # Calculate the total number of non -1 values and the ratio of 1 values
    total_non_minus_1 = np.sum(prev_fire_mask_array != -1)
    ratio_1 = count_1 / total_non_minus_1 if total_non_minus_1 > 0 else 0

    # Print debug information to help trace computations
    print(f"Debug - count_minus_1: {count_minus_1}, count_0: {count_0}, count_1: {count_1}, total_non_minus_1: {total_non_minus_1}, ratio_1: {ratio_1:.2f}")

    # Classify the fire based on the ratio of 1 values
    if ratio_1 > 0.1:
        return "Large fire"
    elif ratio_1 > 0.05:
        return "Medium fire"
    else:
        return "Small fire"

# Function to parse the raw record and classify the fire
def _parse_and_classify_fn(raw_record):
    # Parse the TFRecord into an Example
    example = parse_example(raw_record)

    # Extract the PrevFireMask and FireMask features
    prev_fire_mask = example.features.feature['PrevFireMask']
    fire_mask = example.features.feature['FireMask']

    # Classify the fire based on the PrevFireMask
    fire_classification = classify_fire(prev_fire_mask)

    return example, prev_fire_mask, fire_mask, fire_classification

# Initialize lists to hold records for each fire classification
no_fire_records = []
small_fire_records = []
medium_fire_records = []
large_fire_records = []
very_large_fire_records = []

# Load the dataset from the specified file pattern
file_pattern = '/content/drive/My Drive/northamerica_2012-2023/test/*'
raw_dataset = tf.data.TFRecordDataset(tf.io.gfile.glob(file_pattern))

print("Starting to parse the dataset...")
for raw_record in raw_dataset:
    try:
        # Parse each record into an Example object
        example = parse_example(raw_record)

        # Check if the record has 13 features (specific to your dataset)
        if count_features(example) == 13:
            # Parse and classify the fire based on the PrevFireMask
            parsed_features, prev_fire_mask, fire_mask, classification = _parse_and_classify_fn(raw_record)

            # Extract and reshape the PrevFireMask feature
            prev_fire_mask_value = prev_fire_mask.float_list.value
            prev_fire_mask_array = np.array(prev_fire_mask_value).reshape((64, 64))

            # Count the number of 1 values and calculate the ratio
            count_1 = np.sum(prev_fire_mask_array == 1)
            total_non_minus_1 = np.sum(prev_fire_mask_array != -1)
            ratio_1 = count_1 / total_non_minus_1 if total_non_minus_1 > 0 else 0

            # Print classification and ratio details for each record
            print(f"Record classification: {classification}")
            print(f"Count of 1 values: {count_1}")
            print(f"Total non -1 values: {total_non_minus_1}")
            print(f"Ratio of 1 values to non -1 values: {ratio_1:.2f}")

            # Append the example to the appropriate list based on classification
            if classification == "No fire":
                no_fire_records.append(example)
            elif classification == "Small fire":
                small_fire_records.append(example)
            elif classification == "Medium fire":
                medium_fire_records.append(example)
            elif classification == "Large fire":
                large_fire_records.append(example)
            else:
                very_large_fire_records.append(example)
    except Exception as e:
        # Print error messages if parsing fails
        print(f"Error parsing record: {e}")
        print(f"Record: {raw_record}")

# Save each classification to a separate TFRecord file and print the number of records
write_tfrecord('/content/drive/MyDrive/new_wildfire_subsets_by_firesize/no_fire_data.tfrecords', no_fire_records)
print(f"No fire records: {len(no_fire_records)}")

write_tfrecord('/content/drive/MyDrive/new_wildfire_subsets_by_firesize/small_fire_data.tfrecords', small_fire_records)
print(f"Small fire records: {len(small_fire_records)}")

write_tfrecord('/content/drive/MyDrive/new_wildfire_subsets_by_firesize/medium_fire_data.tfrecords', medium_fire_records)
print(f"Medium fire records: {len(medium_fire_records)}")

write_tfrecord('/content/drive/MyDrive/new_wildfire_subsets_by_firesize/large_fire_data.tfrecords', large_fire_records)
print(f"Large fire records: {len(large_fire_records)}")

write_tfrecord('/content/drive/MyDrive/new_wildfire_subsets_by_firesize/very_large_fire_data.tfrecords', very_large_fire_records)
print(f"Very large fire records: {len(very_large_fire_records)}")

print("Data has been classified and saved into respective TFRecord files.")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Record classification: Small fire
Count of 1 values: 19
Total non -1 values: 4096
Ratio of 1 values to non -1 values: 0.00
Debug - count_minus_1: 0, count_0: 3988, count_1: 108, total_non_minus_1: 4096, ratio_1: 0.03
Record classification: Small fire
Count of 1 values: 108
Total non -1 values: 4096
Ratio of 1 values to non -1 values: 0.03
Debug - count_minus_1: 0, count_0: 4087, count_1: 9, total_non_minus_1: 4096, ratio_1: 0.00
Record classification: Small fire
Count of 1 values: 9
Total non -1 values: 4096
Ratio of 1 values to non -1 values: 0.00
Debug - count_minus_1: 1, count_0: 4057, count_1: 38, total_non_minus_1: 4095, ratio_1: 0.01
Record classification: Small fire
Count of 1 values: 38
Total non -1 values: 4095
Ratio of 1 values to non -1 values: 0.01
Debug - count_minus_1: 0, count_0: 3988, count_1: 108, total_non_minus_1: 4096, ratio_1: 0.03
Record classification: Small fire
Count of 1 values: 108
Total non -1 