In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline

In [2]:
fires = pd.read_excel("fp-historical-wildfire-data-2006-2021.xlsx")

In [3]:
fires.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22914 entries, 0 to 22913
Data columns (total 50 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   fire_year                     22914 non-null  int64         
 1   fire_number                   22914 non-null  object        
 2   fire_name                     604 non-null    object        
 3   current_size                  22914 non-null  float64       
 4   size_class                    22914 non-null  object        
 5   fire_location_latitude        22914 non-null  float64       
 6   fire_location_longitude       22914 non-null  float64       
 7   fire_origin                   22905 non-null  object        
 8   general_cause_desc            22914 non-null  object        
 9   industry_identifier_desc      408 non-null    object        
 10  responsible_group_desc        9031 non-null   object        
 11  activity_class              

In [4]:
#We will exclude fires of Size Class A (0 to 0.1 ha) due to lack of size
class_E_fires = fires[fires['size_class'] == 'E']

In [5]:
class_E_fires.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 325 entries, 208 to 22819
Data columns (total 50 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   fire_year                     325 non-null    int64         
 1   fire_number                   325 non-null    object        
 2   fire_name                     184 non-null    object        
 3   current_size                  325 non-null    float64       
 4   size_class                    325 non-null    object        
 5   fire_location_latitude        325 non-null    float64       
 6   fire_location_longitude       325 non-null    float64       
 7   fire_origin                   325 non-null    object        
 8   general_cause_desc            325 non-null    object        
 9   industry_identifier_desc      2 non-null      object        
 10  responsible_group_desc        36 non-null     object        
 11  activity_class              

In [6]:
# Now let's only include relevant years 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021. Since Petrinex only provides NGLl
# and Marketable Gas Volumes data for the last 4 years, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, and 2023
relevant_fires = class_E_fires[class_E_fires['fire_year'].isin([2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021])]

In [7]:
relevant_fires[['fire_name', 'current_size']].sort_values(by = "current_size", ascending = False)

Unnamed: 0,fire_name,current_size
6376,Horse River Fire,485123.6
2333,Chuckegg Creek Fire,334772.6
2905,McMillan Complex,222837.2
8087,,68000.0
2357,Jackpot Creek Fire,64659.1
...,...,...
8534,,209.6
8570,North End Complex,208.7
6122,,204.3
5287,,201.5


## Consider a Case Study
### Horse River Fire

In [8]:
horse_river_fire = relevant_fires[relevant_fires['fire_name'] == 'Horse River Fire']

In [57]:
horse_river_fire

Unnamed: 0,fire_year,fire_number,fire_name,current_size,size_class,fire_location_latitude,fire_location_longitude,fire_origin,general_cause_desc,industry_identifier_desc,...,distance_from_water_source,first_bucket_drop_date,bh_fs_date,bh_hectares,uc_fs_date,uc_hectares,to_fs_date,to_hectares,ex_fs_date,ex_hectares
6376,2016,MWF009,Horse River Fire,485123.6,E,56.670117,-111.464233,Provincial Land,Under Investigation,,...,,NaT,2016-06-13 08:49:00,589617.0,2016-07-04 09:30:00,589552.0,NaT,,2017-08-02 17:55:00,485123.6


In [9]:
horse_river_fire[['fire_year', 'fire_start_date', 'uc_fs_date']]

Unnamed: 0,fire_year,fire_start_date,uc_fs_date
6376,2016,2016-05-01 00:00:00,2016-07-04 09:30:00


In [10]:
data_during_fire = ['NGL_2016-05-AB.CSV', 'NGL_2016-06-AB.CSV', 'NGL_2016-07-AB.CSV']

In [40]:
# li = []
# for file_name in data_during_fire:
#     df = pd.read_csv(file_name, index_col = None)
#     li.append(df)
# NGL_gas_volumes_during_fire = pd.concat(li, axis = 0, ignore_index = True)
NGL_gas_volumes_before_fire = pd.read_csv('NGL_2016-05-AB.CSV')

In [41]:
NGL_gas_volumes_before_fire = pd.read_csv('NGL_2016-04-AB.CSV')

In [42]:
# From the Map of Designated Oil and Gas Fields we can see that the fields closest to the fire are 
#Newby, Resdeln, Hanginstone, saleski, House, Divide, Granor, Liege, and Ells
close_field_codes = [640.0, 1228.0, 444.0, 811.0, 473.0, 1053.0, 396.0, 538.0, 1319.0]

In [43]:
# Let's first drop wells with no field
NGL_gas_volumes_before_fire = NGL_gas_volumes_before_fire[NGL_gas_volumes_before_fire['Field'].notna()]

In [44]:
NGL_gas_volumes_during_fire = NGL_gas_volumes_during_fire[NGL_gas_volumes_during_fire['Field'].notna()]

In [45]:
NGL_gas_volumes_before_fire.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 122830 entries, 1705 to 124535
Data columns (total 26 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   ReportingFacilityID    122818 non-null  object 
 1   ReportingFacilityName  122818 non-null  object 
 2   OperatorBAID           122818 non-null  object 
 3   OperatorName           122818 non-null  object 
 4   ProductionMonth        122830 non-null  object 
 5   WellID                 122830 non-null  object 
 6   WellLicenseNumber      122830 non-null  object 
 7   Field                  122830 non-null  float64
 8   Pool                   116063 non-null  float64
 9   Area                   5344 non-null    object 
 10  Hours                  122830 non-null  int64  
 11  GasProduction          122830 non-null  float64
 12  OilProduction          122830 non-null  float64
 13  CondensateProduction   122830 non-null  float64
 14  WaterProduction        122830 non

In [46]:
NGL_gas_volumes_before_fire = NGL_gas_volumes_before_fire[['WellID', 'Field', 'Hours', 'GasProduction', 'Energy']]
NGL_gas_volumes_before_fire = NGL_gas_volumes_before_fire[NGL_gas_volumes_before_fire['Hours'].notna()]

In [47]:
NGL_gas_volumes_during_fire = NGL_gas_volumes_during_fire[['WellID', 'Field', 'Hours', 'GasProduction', 'Energy']]
NGL_gas_volumes_during_fire = NGL_gas_volumes_during_fire[NGL_gas_volumes_during_fire['Hours'].notna()]

In [48]:
NGL_gas_volumes_before_fire['Close_Field'] = NGL_gas_volumes_before_fire['Field'].isin(close_field_codes)
NGL_gas_volumes_during_fire['Close_Field'] = NGL_gas_volumes_during_fire['Field'].isin(close_field_codes)

In [49]:

NGL_gas_volumes_during_fire

Unnamed: 0,WellID,Field,Hours,GasProduction,Energy,Close_Field
1716,ABWI100012405027W400,551.0,0,0.0,0,False
1717,ABWI100040207311W602,948.0,0,0.0,0,False
1718,ABWI100043501519W402,560.0,0,0.0,0,False
1719,ABWI100070104424W400,816.0,0,0.0,0,False
1720,ABWI100073502208W400,85.0,0,0.0,4,False
...,...,...,...,...,...,...
370547,ABWI109142108518W500,187.0,740,38.3,1099,False
370548,ABWI110142108518W500,187.0,743,52.6,1509,False
370549,ABWI100082606624W500,56.0,149,105.0,4015,False
370550,ABWI100081506206W400,386.0,272,527.3,17609,False


In [50]:
comparing_fields_before_fire = NGL_gas_volumes_before_fire.groupby('Close_Field').agg({
    'Hours': ['mean', 'std'],
    'GasProduction': ['mean', 'std'],
    'Energy': ['mean', 'std'],
})

In [54]:
comparing_fields_during_fire = NGL_gas_volumes_during_fire.groupby('Close_Field').agg({
    'Hours': ['mean', 'std'],
    'GasProduction': ['mean', 'std'],
    'Energy': ['mean', 'std'],
})

In [55]:
comparing_fields_before_fire

Unnamed: 0_level_0,Hours,Hours,GasProduction,GasProduction,Energy,Energy
Unnamed: 0_level_1,mean,std,mean,std,mean,std
Close_Field,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
False,692.145993,93.553829,77.020604,317.499672,2727.050395,11559.971598
True,708.938889,64.786729,93.792778,99.196655,2949.316667,3177.037622


In [56]:
comparing_fields_during_fire

Unnamed: 0_level_0,Hours,Hours,GasProduction,GasProduction,Energy,Energy
Unnamed: 0_level_1,mean,std,mean,std,mean,std
Close_Field,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
False,702.694117,101.320076,75.558624,302.756112,2679.602408,11050.384494
True,719.418787,74.852959,96.545793,97.718881,3073.003914,3159.223664
