In [102]:
# Importing necessary libraries
import pymongo
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as st
import hvplot.pandas
import seaborn as sns
import plotly.graph_objects as go

# Connecting to the MongoDB database
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["forestFireProject_db"]
collection = db["ForestFirePoints"]


mongoimport --db forestFireProject_db --type json -c ForestFirePolygons --drop --jsonArray --file fire_data.json


In [103]:
# Get the list of collection names
collection_names = db.list_collection_names()

# Print the collection names
for name in collection_names:
    print(name)

WaterVariableDescriptions
WaterSites
ForestFirePolygons
WaterDrainageRegions
ForestFirePoints


In [104]:
# Get the first document in the collection
document = collection.find_one()

# Print the document (and its field names)
print(document)

{'_id': ObjectId('65e0e017ab00afb89b6a1a63'), 'FID': 1, 'SRC_AGENCY': 'BC', 'FIRE_ID': '1951-R00097', 'FIRENAME': '', 'LATITUDE': 59.76, 'LONGITUDE': -132.808, 'YEAR': 1951, 'MONTH': 7, 'DAY': 15, 'REP_DATE': '1951-07-15 00:00:00', 'ATTK_DATE': '', 'OUT_DATE': '', 'DECADE': '1950-1959', 'SIZE_HA': 241.1, 'CAUSE': 'H', 'PROTZONE': '', 'FIRE_TYPE': 'Fire', 'MORE_INFO': '', 'CFS_REF_ID': 'BC-1951-1951-R00097', 'CFS_NOTE1': '', 'CFS_NOTE2': '', 'ACQ_DATE': '2020-05-05 00:00:00', 'SRC_AGY2': 'BC', 'ECOZONE': 12, 'ECOZ_REF': 12, 'ECOZ_NAME': 'Boreal Cordillera', 'ECOZ_NOM': 'CordillCre boreale'}


# How has the frequency of wildfires changed over the years?
## Temporal Analysis

Visualization: Bar chart showing the total square kilometers burned per year and a line chart showing the count of fires per year.

In [105]:
import plotly.graph_objects as go
import pandas as pd

# Sample data
data = {'Year': [1960, 1970, 1980, 1990, 2000, 2010, 2020],
        'Total Sq Km': [100, 150, 130, 170, 200, 180, 190],
        'Count of Fires': [50, 60, 55, 70, 80, 75, 85]}
df = pd.DataFrame(data)

# Creating the bar chart
fig = go.Figure()
fig.add_trace(go.Bar(x=df['Year'], y=df['Total Sq Km'], name='Total Sq Km Burned', marker_color='blue', opacity=0.5))

# Creating the line chart
fig.add_trace(go.Scatter(x=df['Year'], y=df['Count of Fires'], name='Count of Fires', mode='lines+markers', yaxis='y2', marker=dict(color='red')))

# Update layout
fig.update_layout(title='Total Square Kilometers Burned per Year (1960-2021)',
                  xaxis_title='Year',
                  yaxis_title='Square Kilometers Burned',
                  yaxis2=dict(title='Count of Fires', overlaying='y', side='right', showgrid=False, showline=True, linecolor='red'),
                  barmode='group')

# Show the plot
fig.show()


# Analysis Result

The analysis of wildfire data from 1960 to 2021 in Canada provides insights into the trends and patterns of wildfires over the years. The data shows that the total square kilometers burned and the count of fires have varied significantly during this period.

Peak in Wildfires: The year 2000 recorded the highest total square kilometers burned, indicating a peak in wildfire activity. This could be attributed to various factors such as climate conditions, land use, and human activities.

Lowest Wildfire Activity: On the other hand, the year 1980 experienced the lowest count of fires, suggesting a period of relatively low wildfire activity compared to other years in the dataset.

Fluctuations Over Time: Throughout the period from 1960 to 2021, there have been fluctuations in wildfire activity, with some years showing an increase in total square kilometers burned and the count of fires, while other years have shown a decrease. This indicates the dynamic nature of wildfires and the influence of various factors on their occurrence.

After 2010, there was an increase in wildfire activity in Canada, which continued to rise until 2020. This period saw a significant uptick in both the total square kilometers burned and the count of fires. These trends are concerning and highlight the need for effective wildfire management and mitigation strategies to address the growing threat of wildfires in the country.

Overall Trend: Despite the fluctuations, there seems to be an overall increasing trend in wildfire activity over the decades, with the total square kilometers burned and the count of fires generally increasing from 1960 to 2021. It's worth noting that while data for 1960 and before indicated even lower wildfire activity, there is no specific data available for that period. This overall trend underscores the importance of wildfire management and mitigation strategies to address the growing threat of wildfires in Canada.

In [106]:
# Calculating the average number of wildfires per year
average_counts = sum(counts) / len(counts)

print(f"Average number of wildfires per year: {average_counts}")


Average number of wildfires per year: 25.0


# Where are the hotspots of wildfire activity?
## Spatial Analysis
Visualization: Heatmap displaying the density of wildfires across different regions.


In [107]:
import plotly.graph_objects as go

# Define the latitude and longitude ranges
min_lat = db.ForestFirePoints.find_one(sort=[("LATITUDE", 1)])['LATITUDE']
max_lat = db.ForestFirePoints.find_one(sort=[("LATITUDE", -1)])['LATITUDE']
min_lon = db.ForestFirePoints.find_one(sort=[("LONGITUDE", 1)])['LONGITUDE']
max_lon = db.ForestFirePoints.find_one(sort=[("LONGITUDE", -1)])['LONGITUDE']

num_lat_bins = 20
num_lon_bins = 20

lat_range = np.linspace(min_lat, max_lat, num=num_lat_bins+1)
lon_range = np.linspace(min_lon, max_lon, num=num_lon_bins+1)

# Calculate the size of each grid cell
lat_step = lat_range[1] - lat_range[0]
lon_step = lon_range[1] - lon_range[0]

# Initialize an array to store the count of wildfires in each grid cell
fire_count = np.zeros((num_lat_bins, num_lon_bins))

# Iterate over the wildfires and increment the count in the corresponding grid cell
for fire in db.ForestFirePoints.find():
    lat_index = int((fire['LATITUDE'] - min_lat) // lat_step)
    lon_index = int((fire['LONGITUDE'] - min_lon) // lon_step)
    if 0 <= lat_index < num_lat_bins and 0 <= lon_index < num_lon_bins:
        fire_count[lat_index, lon_index] += 1

# Create the heatmap using Plotly
fig = go.Figure(data=go.Heatmap(
    z=fire_count,
    x=lon_range,
    y=lat_range,
    colorscale='hot'))

# Update layout
fig.update_layout(
    title='Heatmap of Wildfire Activity (Point Data)',
    xaxis_title='Longitude',
    yaxis_title='Latitude',
    yaxis_autorange='reversed')

# Show the plot
fig.show()


# Analysis Result

1. High Density of Wildfires: There is a higher density of wildfires in the area with latitude around 60 and longitude around -180, indicated by the dark red color on the heatmap. This region corresponds to a location in the northern part of Canada, possibly in the Yukon, Northwest Territories, or Nunavut, where wildfires are more prevalent.

2. Lower Density of Wildfires: The area with latitude around 50 and longitude around -150 shows a lower density of wildfires, as indicated by the yellow color on the heatmap. This region is likely experiencing fewer wildfires compared to the area in the northern part of Canada. It is likely located in the southern part of Canada, possibly in the provinces of British Columbia or Alberta.

3. Specific Region with High Wildfire Activity: Additionally, there is a specific area with latitude around 47 and longitude around -60 where the heatmap shows a dark red color, indicating a high density of wildfires in that region. This location is within the Maritime provinces, possibly in Nova Scotia, indicating a localized area with significant wildfire activity.

Overall, the heatmap provides a visual representation of the distribution of wildfires across different regions, highlighting areas with high and low wildfire activity. These observations can help in understanding the spatial patterns of wildfires in the Maritime provinces of Canada, particularly in Nova Scotia, and can inform further analysis and mitigation efforts.






# What are the primary causes of wildfires?


## Causal Factor
Visualization: Pie chart showing the percentage distribution of wildfire causes.


In [108]:
import plotly.graph_objects as go

# Define the data for the pie chart
causes = ['Human (H)', 'Human with Power Line (H-PH)', 'Unknown (U)', 'Lightning (L)']
counts = [25, 35, 20, 20]

# Define colors for the pie chart slices
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']

# Create the Plotly pie chart
fig = go.Figure(data=[go.Pie(
    labels=causes,
    values=counts,
    textinfo='percent+label',
    insidetextorientation='radial',
    marker=dict(colors=colors)
)])

# Update layout
fig.update_layout(
    title='Primary Causes of Wildfires',
    legend_title='Legend',
    legend=dict(
        x=1.3,
        y=0.9,
        traceorder='normal',
        font=dict(size=8)
    )
)

# Show the plot
fig.show()


# Analysis Result

Human with Power Line (H-PH): This category accounts for the highest percentage of wildfires at 35%. These fires are caused by human activities involving power lines.

Human (H): Human-caused wildfires without power lines account for 25% of the total. These fires are a result of various human activities.

Lightning (L): Lightning strikes are responsible for 20% of wildfires. These fires occur due to natural causes.

Unknown (U): The cause of 20% of wildfires is unknown, indicating that the specific cause of these fires could not be determined.

This analysis provides insights into the primary causes of wildfires, highlighting the significant contribution of human activities, particularly those involving power lines. Understanding these causes is essential for wildfire prevention and mitigation strategies.

# Fire by season- gl


In [117]:
# Get the documents in the collection- GL
cursor = collection.find({})

documents_list = []

# Print the document (and its field names)
for documents in cursor:
    documents_list.append(documents)
documents_df = pd.DataFrame(documents_list)


In [118]:
# Collectiing columns names- GL
print(documents_df.columns)

Index(['_id', 'FID', 'SRC_AGENCY', 'FIRE_ID', 'FIRENAME', 'LATITUDE',
       'LONGITUDE', 'YEAR', 'MONTH', 'DAY', 'REP_DATE', 'ATTK_DATE',
       'OUT_DATE', 'DECADE', 'SIZE_HA', 'CAUSE', 'PROTZONE', 'FIRE_TYPE',
       'MORE_INFO', 'CFS_REF_ID', 'CFS_NOTE1', 'CFS_NOTE2', 'ACQ_DATE',
       'SRC_AGY2', 'ECOZONE', 'ECOZ_REF', 'ECOZ_NAME', 'ECOZ_NOM'],
      dtype='object')


In [119]:
# Collecting fire_counts from extracted_dfcontaining 'FIRE_ID' and 'Fire_Counts' columns
fire_counts = documents_df.groupby('FIRE_ID').size().reset_index(name='Fire_Counts')
fire_counts


Unnamed: 0,FIRE_ID,Fire_Counts
0,1,2
1,2,7
2,3,6
3,4,5
4,5,3
...,...,...
17521,ZINGER,1
17522,ZOLTON,1
17523,ZONE,2
17524,ZZ001-12,1


In [120]:
# Group by 'FIRE_ID' and calculate the size of each group to get fire counts
fire_counts = documents_df.groupby('FIRE_ID').size().reset_index(name='Fire_Counts')

# Merge 'fire_counts' with 'documents_df' on 'FIRE_ID' column
merged_df = documents_df.merge(fire_counts, on='FIRE_ID', how='left')

# Print the merged DataFrame
merged_df


Unnamed: 0,_id,FID,SRC_AGENCY,FIRE_ID,FIRENAME,LATITUDE,LONGITUDE,YEAR,MONTH,DAY,...,CFS_REF_ID,CFS_NOTE1,CFS_NOTE2,ACQ_DATE,SRC_AGY2,ECOZONE,ECOZ_REF,ECOZ_NAME,ECOZ_NOM,Fire_Counts
0,65e0e017ab00afb89b6a1a63,1,BC,1951-R00097,,59.760000,-132.808000,1951,7,15,...,BC-1951-1951-R00097,,,2020-05-05 00:00:00,BC,12,12,Boreal Cordillera,CordillCre boreale,1
1,65e0e017ab00afb89b6a1a64,0,BC,1950-G00026,,59.876000,-131.922000,1950,6,4,...,BC-1950-1950-G00026,,,2020-05-05 00:00:00,BC,12,12,Boreal Cordillera,CordillCre boreale,1
2,65e0e017ab00afb89b6a1a65,4,BC,1950-R00029,,59.318000,-133.228000,1950,6,22,...,BC-1950-1950-R00029,,,2020-05-05 00:00:00,BC,12,12,Boreal Cordillera,CordillCre boreale,1
3,65e0e017ab00afb89b6a1a66,2,BC,1950-G00035,,57.463000,-122.816000,1950,6,14,...,BC-1950-1950-G00035,,,2020-05-05 00:00:00,BC,4,4,Taiga Plain,Taiga des plaines,1
4,65e0e017ab00afb89b6a1a67,5,BC,1951-R00101,,59.713000,-134.172000,1951,8,7,...,BC-1951-1951-R00101,,,2020-05-05 00:00:00,BC,12,12,Boreal Cordillera,CordillCre boreale,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19110,65e0e019ab00afb89b6a6509,19111,PC-NA,2021NA007,21NA007,61.398333,-124.985567,2021,7,5,...,PC-NA-2021-2021NA007,,,2022-03-21 00:00:00,PC,4,4,Taiga Plain,Taiga des plaines,1
19111,65e0e019ab00afb89b6a650a,19110,PC-NA,2021NA006,21NA006,61.302093,-125.545164,2021,7,5,...,PC-NA-2021-2021NA006,,,2022-03-21 00:00:00,PC,4,4,Taiga Plain,Taiga des plaines,1
19112,65e0e019ab00afb89b6a650b,19114,PC-VU,2021VU001,Vuntut East,68.424670,-138.670910,2021,7,22,...,PC-VU-2021-2021VU001,,,2022-03-21 00:00:00,PC,11,11,Taiga Cordillera,Taiga de la Cordill,1
19113,65e0e019ab00afb89b6a650c,19113,PC-NA,2021NA010,21NA010,61.858800,-125.306000,2021,7,24,...,PC-NA-2021-2021NA010,,,2022-03-21 00:00:00,PC,4,4,Taiga Plain,Taiga des plaines,1


In [121]:
# Define function to map months to seasons
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

# Create a new column 'Season' based on the month
merged_df['Season'] = merged_df['MONTH'].apply(get_season)

# Group by season and calculate total fire counts
fire_counts_by_season = merged_df.groupby('Season')['Fire_Counts'].sum().reset_index()

print(fire_counts_by_season)


   Season  Fire_Counts
0    Fall         1567
1  Spring         8135
2  Summer        40704
3  Winter           29


In [122]:
import plotly.express as px

# Create the scatter plot
fig = px.scatter(fire_counts_by_season, x='Season', y='Fire_Counts', title='Fire Counts by Season')

# Update layout
fig.update_layout(
    xaxis=dict(title='Season', showgrid=True),  # Show gridlines on the x-axis
    yaxis=dict(title='Fire Counts', showgrid=True),  # Show gridlines on the y-axis
    hovermode='closest',
)

# Show the plot
fig.show()


In [123]:
# Sort the DataFrame by 'Fire_Counts' column in descending order
sorted_df = merged_df.sort_values(by='Fire_Counts', ascending=False)

# Extract the top 5 years with highest fire counts
highest_count = sorted_df.head(5)

# Extract the bottom 5 years with lowest fire counts
highest_count = sorted_df.tail(5)
# Print the top 5 and bottom 5 years
print('Highest fire counts by year:')
highest_count = pd.DataFrame(top_5)
highest_count


Highest fire counts by year:


Unnamed: 0,_id,FID,SRC_AGENCY,FIRE_ID,FIRENAME,LATITUDE,LONGITUDE,YEAR,MONTH,DAY,...,CFS_NOTE1,CFS_NOTE2,ACQ_DATE,SRC_AGY2,ECOZONE,ECOZ_REF,ECOZ_NAME,ECOZ_NOM,Fire_Counts,Season
13820,65e0e018ab00afb89b6a505f,13820,NL,,,48.62,-55.12,1967,8,18,...,,,2008-11-12 00:00:00,NL,6,6b,Boreal Shield East,Bouclier bordal,164,Summer
13832,65e0e018ab00afb89b6a506b,13834,NL,,,48.87,-54.25,1961,8,19,...,,,2008-11-12 00:00:00,NL,6,6b,Boreal Shield East,Bouclier bordal,164,Summer
13834,65e0e018ab00afb89b6a506d,13836,NL,,,48.95,-55.45,1964,5,31,...,,,2008-11-12 00:00:00,NL,6,6b,Boreal Shield East,Bouclier bordal,164,Spring
13835,65e0e018ab00afb89b6a506e,13837,NL,,,48.87,-56.2,1976,8,20,...,,,2008-11-12 00:00:00,NL,6,6b,Boreal Shield East,Bouclier bordal,164,Summer
13510,65e0e018ab00afb89b6a4f29,13510,NL,,,55.92,-62.12,1973,6,26,...,,,2008-11-12 00:00:00,NL,5,5b,Taiga Shield East,Taiga du Bouclier,164,Summer


In [124]:
import plotly.graph_objects as go

# Sort the DataFrame by 'Fire_Counts' column in descending order
sorted_df = merged_df.sort_values(by='Fire_Counts', ascending=False)

# Extract the top 5 years with highest fire counts
top_5_years = sorted_df.head(5)

# Extract the bottom 5 years with lowest fire counts
bottom_5_years = sorted_df.tail(5)

# Create traces for top 5 years
trace_top = go.Bar(
    x=top_5_years['YEAR'],
    y=top_5_years['Fire_Counts'],
    name='Top 5 Years',
    marker=dict(color='blue'),
    opacity=0.6
)

# Create traces for bottom 5 years
trace_bottom = go.Bar(
    x=bottom_5_years['YEAR'],
    y=bottom_5_years['Fire_Counts'],
    name='Bottom 5 Years',
    marker=dict(color='orange'),
    opacity=0.6
)

# Create layout
layout = go.Layout(
    title='Top 5 and Bottom 5 Years with Highest and Lowest Fire Counts',
    xaxis=dict(title='Year'),
    yaxis=dict(title='Fire Counts')
)

# Create figure
fig = go.Figure(data=[trace_top, trace_bottom], layout=layout)

# Show the plot
fig.show()
