In [1]:
import pandas as pd
import datetime
import numpy as np

# Template preprocessing

Read in the template that has the burn activity of the different fires over time, and get the fire IDs represented.

In [2]:
template = pd.read_csv('temporalBurnArea.csv').fillna(value=0)
template.epm_kg = template.epm_kg.astype(int)
fires = template.FireID.unique()
print(fires)
output_data = []
print(template.epm_kg.dtype)

[20 33 35 37  9 24 21 22 25 27 26 31 32 34]
int64


The template needs some work. It was generated in Libreoffice by selecting only certain rows from a CSV file which had emissions for each fire ID. However, not every hour contained data for every fire. We first need to ensure that every fire is represented once and only once for each hour of the simulation. When finished, the template should contain one record per fire per timestep. First, we need to get the start and end times. 

In [3]:
def get_bounds() : 
    first_rec = template.loc[0] 
    start = datetime.datetime(int(first_rec.Year), 
                              int(first_rec.Month), 
                              int(first_rec.Day),
                              int(first_rec.Hour), 
                              int(first_rec.Minute), 0)

    last_rec = template.loc[template.shape[0]-1]
    last = datetime.datetime(int(last_rec.Year), 
                             int(last_rec.Month), 
                             int(last_rec.Day),
                             int(last_rec.Hour), 
                             int(last_rec.Minute), 0)
    
    delta = last - start
    delta_hours = int((delta.days * 24) + (delta.seconds / 3600))
    return start, last, delta_hours

start, last, delta_hours = get_bounds()
delta_hours


217

In [4]:
def lookup_from_template(index) : 
    """Returns the first entry matching the simulation time and fire ID.
       If there is no such entry, it returns None."""
    sim_time = index[0]
    fire     = index[1]
    # lookup record in template
    matching = ((template.Year == sim_time.year) &
                (template.Month == sim_time.month) &
                (template.Day == sim_time.day) & 
                (template.Hour == sim_time.hour) & 
                (template.FireID == fire))
    
    retval = None
    if np.count_nonzero(matching) > 0 : 
        first_row = np.nonzero(matching.values)[0][0]
        retval = template.iloc[[first_row]]
        
    return retval

def lookup_fireid(the_fire) : 
    """Finds and returns the first record pertaining to the given fire id."""
    matching = template.FireID == the_fire
    first_row = np.nonzero(matching.values)[0][0]
    return template.loc[[first_row]]
    

# construct a comprehensive set of indices for the output
indices = [ ( (start + (i*datetime.timedelta(hours=1))), f) for i in range(delta_hours) for f in fires ]
        
        
        

Here is where we do a couple of things. First, we create a new template from the data in the CSV file (read in previously.) We do this a little laboriously because there are some instances where there are duplicate emissions for the same fire in the same time period. By using a loop, we can make sure there's only one value. A loop also allows us to find all those fire+time combinations which have no emissions data in the CSV file. Once known, we can pad out the template with dummy values for each timestep where they are missing. We do not want to shut the fire completely off, but are turning it down to a minimum value. 

Note that when we turn the fire off, we set it's emission to 2 "units per hour". In the particle dump output, you can identify these "off" particles as having a mass of 2/60/(*number of processors*).

In [5]:

full_template=None
cntr=0
for i in indices : 
    thisval = lookup_from_template(i)
    if thisval is None : 
        firstfire = lookup_fireid(i[1])
        thisval = pd.DataFrame({"Year":int(i[0].year), "Month":int(i[0].month), "Day":int(i[0].day),
                             "Hour":int(i[0].hour), "Minute":0, "Duration":100, 
                             "Latitude":firstfire.Latitude, "Longitude":firstfire.Longitude, "Height":10, 
                             "epm_kg":int(0), 
                             "Area":0, 
                             "Power":0, 
                             "FireID":i[1]})

    thisval.index = [cntr]
    if full_template is not None : 
        full_template = full_template.append(thisval)
    else :
        full_template = thisval
        
    cntr = cntr+1

min_emission_val=2
full_template.loc[full_template.epm_kg == 0, 'epm_kg'] = min_emission_val

# Sparsifying the matrix

Now expand the template out, yielding one record for each pollutant per timestep per fireID. HYSPLIT's mechanism for this allows you to specify any amount of pollutant release for each source location. To do this, you repeat the _entire_ list of source locations for each pollutant. We are only going to allow a single fire to release a single type of pollutant. Hence we are going to walk through the list of fire IDs, and turn the fire down to a set minimum value. This is a different set minimum value than above, as this represents the value of a pollutant emitted by a fire which should not be emitting that tracer at all.

Note we have chosen to set this "should not be emitting that tracer at all" value to 1 mass unit per hour. You can identify these particles in the particle dump by looking for a mass of 1./60/(*number of processors*).

In [6]:
for fire in fires : 
    d = full_template.copy()
    d.loc[d['FireID'] != fire, 'epm_kg'] = 0
    output_data.append(d)

In [7]:
output_df = pd.concat(output_data, ignore_index=True, sort=False)
del output_df['FireID']

In [8]:
output_df['epm_kg']

0           2
1           0
2           0
3           0
4           0
         ... 
42527       0
42528       0
42529       0
42530       0
42531    2931
Name: epm_kg, Length: 42532, dtype: int64

In [9]:
output_df[output_df['epm_kg']!=0]

Unnamed: 0,Year,Month,Day,Hour,Minute,Duration,Latitude,Longitude,Height,epm_kg,Area,Power
0,2017,8,20,6,0,100,46.37079,-114.4978,10,2,0,0
14,2017,8,20,7,0,100,46.37079,-114.4978,10,2,0,0
28,2017,8,20,8,0,100,46.37079,-114.4978,10,2,0,0
42,2017,8,20,9,0,100,46.37079,-114.4978,10,2,0,0
56,2017,8,20,10,0,100,46.37079,-114.4978,10,2,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
42475,2017,8,29,2,0,100,45.98461,-115.0838,10,2931,0,0
42489,2017,8,29,3,0,100,45.98461,-115.0838,10,5862,0,0
42503,2017,8,29,4,0,100,45.98461,-115.0838,10,5862,0,0
42517,2017,8,29,5,0,100,45.98461,-115.0838,10,2931,0,0


In [10]:
template[template['epm_kg']!=0]

Unnamed: 0,Year,Month,Day,Hour,Minute,Duration,Latitude,Longitude,Height,epm_kg,Area,Power,FireID
62,2017,8,22,20,0,100,46.37079,-114.4978,10,8715,0,0,20
63,2017,8,22,21,0,100,46.37079,-114.4978,10,14525,0,0,20
64,2017,8,22,22,0,100,46.37079,-114.4978,10,11620,0,0,20
65,2017,8,22,23,0,100,46.37079,-114.4978,10,11620,0,0,20
66,2017,8,23,0,0,100,46.37079,-114.4978,10,8715,0,0,20
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1450,2017,8,29,5,0,100,46.08342,-115.0062,10,3746,0,0,35
1452,2017,8,29,6,0,100,46.37918,-114.4868,10,9498,0,0,20
1454,2017,8,29,6,0,100,44.37884,-115.5267,10,27342,0,0,32
1456,2017,8,29,6,0,100,45.98461,-115.0838,10,2931,0,0,34


In [11]:
output_df.to_csv('multiple_output', header=False, index=False, sep=' ', float_format='%8.5f')

In [12]:
just_locations = pd.DataFrame([full_template['Latitude'], full_template['Longitude'], full_template['Height']]).T
just_locations.to_csv('fire_locations',header=False, index=False, sep=' ', float_format='%8.5f')

In [13]:
just_locations

Unnamed: 0,Latitude,Longitude,Height
0,46.37079,-114.4978,10.0
1,44.57615,-114.6479,10.0
2,46.08297,-115.0099,10.0
3,46.15411,-114.8688,10.0
4,48.97658,-115.4183,10.0
...,...,...,...
3033,47.23333,-113.4089,10.0
3034,47.08519,-113.7367,10.0
3035,45.38332,-115.1659,10.0
3036,44.37884,-115.5267,10.0
