In [None]:
import pandas as pd
import numpy as np
from math import sqrt, pi

In [None]:
updated_df = pd.read_csv('updated_file.csv')
st50a_df = pd.read_csv('ST50A_cleaned.csv')


def calc_distance(lat1, lon1, lat2, lon2):

    phi1, phi2 = np.radians(lat1), np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)

    a = np.sin(delta_phi / 2.0) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    distance = 6371 * 1000 * c
    return distance


start_date = pd.to_datetime('2007-02-01')
end_date = pd.to_datetime('2021-10-31')
month_range = pd.date_range(start_date, end_date, freq='MS')

for month in month_range:
    st50a_df[month.strftime('%Y-%m')] = 0


for index, st50a_row in st50a_df.iterrows():
    for _, fire_row in updated_df.iterrows():
        fire_radius = sqrt((fire_row['current_size'] * 10000) / pi) + (16.0934 * 1000)
        distance = calc_distance(st50a_row['Location Latitude'], st50a_row['Location Longitude'],
                                 fire_row['fire_location_latitude'], fire_row['fire_location_longitude'])

        if pd.notnull(fire_row['fire_start_date']) and distance <= fire_radius:
            fire_start_date = pd.to_datetime(fire_row['fire_start_date']).strftime('%Y-%m')
            if fire_start_date in st50a_df.columns:
                st50a_df.at[index, fire_start_date] = 1


        if pd.notnull(fire_row['fire_start_date']) and distance <= fire_radius:

            fire_start_date = pd.to_datetime(fire_row['fire_start_date']).strftime('%Y-%m')
            if fire_start_date in st50a_df.columns:
                st50a_df.at[index, fire_start_date] = 1



st50a_df.to_csv('ST50A_updated_with_fire_info_pipe.csv', index=False)


In [None]:

df = pd.read_csv('pipeline_affected.csv')


df['Avg Capacity (1000 m3/d)'] = pd.to_numeric(df['Avg Capacity (1000 m3/d)'], errors='coerce')
df['Avg Capacity (1000 m3/d)'].fillna(0, inplace=True)


month_columns = [col for col in df.columns if col.startswith('20')]

raw_gas_sums = {}


for month in month_columns:
    raw_gas_sums[month] = df.loc[df[month] == 1, 'Avg Capacity (1000 m3/d)'].sum()

monthly_data_df = pd.DataFrame(list(raw_gas_sums.items()), columns=['Month', 'Avg Capacity (1000 m3/d)'])
monthly_data_df['Avg Capacity (1000 m3/d)'] = monthly_data_df['Avg Capacity (1000 m3/d)'].apply(lambda x: f"{x:,.2f}")
monthly_data_df.to_csv('monthly_affected_pipes.csv', index=False)

print(monthly_data_df)


In [None]:
df = pd.read_csv('ST50A_updated_with_fire_info_update.csv')
df['Raw Gas E3m3/d'] = pd.to_numeric(df['Raw Gas E3m3/d'], errors='coerce').fillna(0)

excluded_statuses = ['Abandoned', 'RecExempt', 'Cancelled', 'RecCertified']
df_filtered = df[~df['Licence Status'].isin(excluded_statuses)]

month_columns = [col for col in df.columns if col.startswith('20')]

aggregated_data = []

for month in month_columns:

    affected_rows = df_filtered[df_filtered[month] == 1]
    subtype_sums = affected_rows.groupby('Facility Subtype')['Raw Gas E3m3/d'].sum().reset_index(name='Raw_Gas_Sum_E3m3/d')
    subtype_sums['Month'] = month
    aggregated_data.append(subtype_sums)


aggregated_df = pd.concat(aggregated_data)
pivot_df = aggregated_df.pivot(index='Month', columns='Facility Subtype', values='Raw_Gas_Sum_E3m3/d').fillna(0)
pivot_df.to_csv('monthly_gas_summary_by_subtype.csv', index=True)



In [None]:
base_df = pd.read_csv('final_merged_base_alberta_wildfire_with_subtype_gas_summary.csv')
base_df['Month'] = pd.to_datetime(base_df['Month']).dt.to_period('M').astype(str)


monthly_pipes_df = pd.read_csv('monthly_affected_pipes.csv')
monthly_pipes_df['Month'] = pd.to_datetime(monthly_pipes_df['Month']).dt.to_period('M').astype(str)
merged_df = pd.merge(base_df, monthly_pipes_df, on='Month', how='left')

for col in monthly_pipes_df.columns[1:]:
    if col in merged_df:
        merged_df[col] = merged_df[col].fillna(0)

merged_df.to_csv('final_merged_base_alberta_wildfire_with_gas_and_pipes_summary.csv', index=False)

In [None]:
monthly_counts_df = pd.read_csv('monthly_affected_plants_count.csv')

base_df = pd.read_csv('base_alberta_wildfire_1.csv')
base_df['Month'] = pd.to_datetime(base_df['Date']).dt.to_period('M').astype(str)

merged_df = pd.merge(base_df, monthly_counts_df, left_on='Month', right_on='Month', how='left')
merged_df['Affected_Plants_Count'] = merged_df['Affected_Plants_Count'].fillna(0)
merged_df.to_csv('merged_base_alberta_wildfire_with_plant_counts.csv', index=False)

In [None]:
merged_df = pd.read_csv('merged_base_alberta_wildfire_with_plant_counts.csv')

cols = ['Month'] + [col for col in merged_df.columns if col not in ['Month', 'Date', 'labels']]
merged_df = merged_df[cols]
merged_df.to_csv('adjusted_merged_base_alberta_wildfire.csv', index=False)

In [None]:
updated_df = pd.read_csv('updated_file.csv')
st50a_df = pd.read_csv('ST50A_cleaned.csv')


def calc_distance(lat1, lon1, lat2, lon2):

    phi1, phi2 = np.radians(lat1), np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)

    a = np.sin(delta_phi / 2.0) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    distance = 6371 * 1000 * c
    return distance


start_date = pd.to_datetime('2007-02-01')
end_date = pd.to_datetime('2021-10-31')
month_range = pd.date_range(start_date, end_date, freq='MS')

for month in month_range:
    st50a_df[month.strftime('%Y-%m')] = 0


for index, st50a_row in st50a_df.iterrows():
    for _, fire_row in updated_df.iterrows():
        fire_radius = sqrt((fire_row['current_size'] * 10000) / pi) + 3000
        distance = calc_distance(st50a_row['Latitude'], st50a_row['Longitude'],
                                 fire_row['fire_location_latitude'], fire_row['fire_location_longitude'])

        if pd.notnull(fire_row['fire_start_date']) and distance <= fire_radius:

            fire_start_date = pd.to_datetime(fire_row['fire_start_date']).strftime('%Y-%m')
            if fire_start_date in st50a_df.columns:
                st50a_df.at[index, fire_start_date] = 1



st50a_df.to_csv('ST50A_updated_with_fire_info_pipe.csv', index=False)


In [None]:
updated_df = pd.read_csv('updated_file.csv')
st50a_df = pd.read_csv('monthly_averages.csv')


def calc_distance(lat1, lon1, lat2, lon2):

    phi1, phi2 = np.radians(lat1), np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)

    a = np.sin(delta_phi / 2.0) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    distance = 6371 * 1000 * c
    return distance


start_date = pd.to_datetime('2007-02-01')
end_date = pd.to_datetime('2021-10-31')
month_range = pd.date_range(start_date, end_date, freq='MS')

for month in month_range:
    st50a_df[month.strftime('%Y-%m')] = 0


for index, st50a_row in st50a_df.iterrows():
    for _, fire_row in updated_df.iterrows():
        fire_radius = sqrt((fire_row['current_size'] * 10000) / pi)
        distance = calc_distance(st50a_row['Latitude'], st50a_row['Longitude'],
                                 fire_row['fire_location_latitude'], fire_row['fire_location_longitude'])

        if pd.notnull(fire_row['fire_start_date']) and distance <= fire_radius:
            fire_start_date = pd.to_datetime(fire_row['fire_start_date']).strftime('%Y-%m')
            if fire_start_date in st50a_df.columns:
                st50a_df.at[index, fire_start_date] = 1


        if pd.notnull(fire_row['fire_start_date']) and distance <= fire_radius:

            fire_start_date = pd.to_datetime(fire_row['fire_start_date']).strftime('%Y-%m')
            if fire_start_date in st50a_df.columns:
                st50a_df.at[index, fire_start_date] = 1



st50a_df.to_csv('/content/drive/MyDrive/Wildfires/ST50A_updated_with_fire_info_pipe.csv', index=False)


In [None]:
df = pd.read_csv('updated_file.csv')
df['fire_radius'] = df['current_size'].apply(lambda x: sqrt((x * 10000) / pi))
output_df = df[['fire_year', 'fire_number', 'fire_radius']]
output_df.to_csv('fire_radius_info.csv', index=False)


In [None]:
data_path = 'final_merged_base_alberta_wildfire_with_gas_and_pipes_summary.csv'
df = pd.read_csv(data_path)
df['Year'] = pd.to_datetime(df['Month']).dt.year
df['Month'] = pd.to_datetime(df['Month']).dt.month


features = [
    'A', 'B', 'C', 'D', 'E',
    'Year', 'Month',
    'Gas Plant Frac', 'Gas Plant Sweet', 'Gp Acid Gas Flaring',
    'Gp Acid Gas Flaring%', 'Gp Acid Gas Inj', 'Gp Mainline Strdle',
    'Gp Sulphur Rcvry', 'Avg Capacity (1000 m3/d)', 
    'prod_%_diff'
]
for feature in features:
    if df[feature].dtype == object:
        df[feature] = df[feature].str.replace(',', '').astype(float)
corr_matrix = df[features].corr()
correlations = corr_matrix['prod_%_diff'].sort_values(ascending=False)

print("Correlation with 'prod_%_diff':")
print(correlations)
