Note: input_tracking goes up to 20218, so last 10000 would start at 10218 which has value   1845.0

In [12]:
import numpy as np
import re
import sys
import os
import pandas as pd

# from fireareas10000.txt, get all the lines and add them to the end of input_tracking.txt in ../01-dataset-fn-sub15/
# the first line of input_tracking.txt is a header, so we need to skip it
input_tracking_path = '../01-dataset-fn-sub15/input_tracking.txt'
fireareas_path = '../fireareas10000.txt'

# read the input_tracking.txt file
with open(input_tracking_path, 'r') as f:
    lines = f.readlines()
# read the fireareas10000.txt file
with open(fireareas_path, 'r') as f:
    fireareas_lines = f.readlines()

# make sure there are the same number of lines in both files
if len(lines) != len(fireareas_lines)+1:
    print(f"Error: {len(lines)} lines in input_tracking.txt, {len(fireareas_lines)} lines in fireareas10000.txt")


In [14]:
# count the comma separated values in this string 10000,489.4,-1781.1,14,38,61,15.5,9.1,4.8,7.6,10.5,81,4,2,22,79.4,80.4,165.9
def count_comma_separated_values(s):
    return len(re.findall(r',', s)) + 1

count_comma_separated_values(lines[0])  # should be 18, the number of columns in the header
count_comma_separated_values(lines[10000])  # should be 1, the fire area column

18

In [9]:
# save the fire area lines to the end of each line in input_tracking.txt, but skip the first line, the header already has the firearea column
with open(input_tracking_path, 'w') as f:
    for i, line in enumerate(lines):
        if i == 0:
            f.write(line)
        else:
            # get the fire area from the fireareas_lines, which is one less than the current line number
            fire_area = fireareas_lines[i-1].strip()
            # write the line with the fire area appended
            f.write(line.strip() + ',' + fire_area + '\n')

In [None]:
# get lines 1024 onward from input_tracking.txt
import numpy as np
import re
import sys
import os
import pandas as pd

def get_fire_areas_for_10000():
    # Read the input tracking file
    input_file = 'input_tracking.txt'
    if not os.path.exists(input_file):
        print(f"Error: {input_file} does not exist.")
        sys.exit(1)

    df = pd.read_csv(input_file, sep='\t', header=None, skiprows=1023)
    
    # Check if the DataFrame is empty
    if df.empty:
        print("No data found in the input tracking file after line 1024.")
        return []

    # Extract the fire area values from the DataFrame
    fire_areas = df.iloc[:, 0].values.tolist()
    
    return fire_areas

# add fire areas to ../01-dataset-fn-sub15/input_tracking.txt at the end of each line
# where the lines are structured like so: run,xign,yign,fuel,slp,asp,ws,wd,m1,m10,m100,cc,ch,cbh,cbd,lhc,lwc,firearea
def add_area_to_end_of_lines(fire_areas):
    input_file = 'input_tracking.txt'
    if not os.path.exists(input_file):
        print(f"Error: {input_file} does not exist.")
        sys.exit(1)

    with open(input_file, 'r') as f:
        lines = f.readlines()

    # Check if the number of fire areas matches the number of lines
    if len(fire_areas) != len(lines) - 1:  # Exclude header line
        print(f"Error: Expected {len(lines) - 1} fire areas, but got {len(fire_areas)}.")
        print("Error: The number of fire areas does not match the number of data lines.")
        return

    # Add fire area to each line
    with open(input_file, 'w') as f:
        for i, line in enumerate(lines):
            if i == 0:
                continue
            else:
                f.write(line.strip() + f',{fire_areas[i-1]}\n')

get_fire_areas_for_10000()
fire_areas = get_fire_areas_for_10000()
add_area_to_end_of_lines(fire_areas)
