In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
%matplotlib inline

In [2]:
import requests  # Library for making HTTP requests to download data
from io import StringIO  # Library for handling string data as file-like objects

# Define the URL for the CSV file
URL = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DV0101EN-SkillsNetwork/Data%20Files/Historical_Wildfires.csv"

# Fetch the CSV file using requests
response = requests.get(URL)
response.raise_for_status()  # Check if the request was successful

# Convert the content of the response into a file-like object
csv_content = StringIO(response.text)

# Read the CSV file into a pandas DataFrame
df = pd.read_csv(csv_content)

print('Data read into a pandas dataframe!')
df.head()  # Display the first few rows of the DataFrame

Data read into a pandas dataframe!


In [3]:
# Verify the column names of each variable
# Column names
df.columns.tolist()

In [4]:
#Verify the data types of each variable
# #data type
df.dtypes.tolist()

In [5]:
# Extract 'Year' and 'Month' from date and include in the dataframe as separate columns
df['Year'] = pd.to_datetime(df['Date']).dt.year
df['Month'] = pd.to_datetime(df['Date']).dt.month

In [6]:
# Verify the columns again
df.dtypes.tolist()

In [7]:
# Show change in average estimated fire area over time using pandas to plot

plt.figure(figsize=(12,6))

# Grouping the data by 'Year' and calculating the mean of 'Estimated_fire_area'
df_new = df.groupby('Year')['Estimated_fire_area'].mean()

# Plotting the data
df_new.plot(x=df_new.index, y=df_new.values)
plt.xlabel('Year')
plt.ylabel('Average Estimated Fire Area (km²)')
plt.title('Estimated Fire Area over Time')
plt.show()

In [8]:
# Peak in the plot between 2010 to 2013. Narrow down our finding, by plotting the estimated fire area for year grouped together with month.
# Grouping the data by both 'Year' and 'Month', and calculating the mean of 'Estimated_fire_area'
df_new = df.groupby(['Year', 'Month'])['Estimated_fire_area'].mean()
# Plotting the data
plt.figure(figsize=(12,6))
df_new.plot()
plt.xlabel('Year and Month')
plt.ylabel('Average Estimated Fire Area (km²)')
plt.title('Estimated Fire Area by Month Over Time')
plt.show()

In [9]:
# Look at Distribution of Mean Using Seaborn to Make Barplot
# Identify the regions in the dataset (apply it on series only)
df['Region'].unique()
# Creating a bar plot using seaborn to visualize the distribution of mean estimated fire brightness across regions
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='Region', y='Mean_estimated_fire_brightness')
plt.xlabel('Region')
plt.ylabel('Mean Estimated Fire Brightness (Kelvin)')
plt.title('Distribution of Mean Estimated Fire Brightness across Regions')
plt.show()

In [10]:
# Creating a pie chart to visualize the portion of count of pixels for presumed vegetation fires across regions
plt.figure(figsize=(10, 6))
# Grouping the data by region and summing the counts
region_counts = df.groupby('Region')['Count'].sum()
# Creating the pie chart using plt.pie function
# Labels are set to the region names, and autopct is used to display percentage
plt.pie(region_counts, labels=region_counts.index, autopct='%1.1f%%')
plt.title('Percentage of Pixels for Presumed Vegetation Fires by Region')
plt.axis('equal')
plt.show()

In [11]:
# Creating a histogram of mean estimated fire brightness
plt.figure(figsize=(10, 6))
# Using plt.hist to create the histogram
# Setting the number of bins to 20 for better visualization
plt.hist(x=df['Mean_estimated_fire_brightness'], bins=20)
plt.xlabel('Mean Estimated Fire Brightness (Kelvin)')
plt.ylabel('Count')
plt.title('Histogram of Mean Estimated Fire Brightness')
plt.show()

In [12]:
# Creating a histogram to visualize the distribution of mean estimated fire brightness across regions using Seaborn
# Using sns.histplot to create the histogram
# Specifying the DataFrame (data=df) and the column for the x-axis (x='Mean_estimated_fire_brightness')
# Adding hue='Region' to differentiate the distribution across regions
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x='Mean_estimated_fire_brightness', hue='Region')
plt.xlabel('Mean Estimated Fire Brightness (Kelvin)')
plt.ylabel('Count')
plt.title('Distribution of Mean Estimated Fire Brightness Across Regions')
plt.show()

In [13]:
# Creating a scatter plot to visualize the relationship between mean estimated fire radiative power and mean confidence using Seaborn
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='Mean_confidence', y='Mean_estimated_fire_radiative_power')
plt.xlabel('Mean Estimated Fire Radiative Power (MW)')
plt.ylabel('Mean Confidence')
plt.title('Mean Estimated Fire Radiative Power vs. Mean Confidence')
plt.show()

In [14]:
# Let's mark these seven regions on the Map of Australia using Folium
region_data = {'region': ['NSW', 'QL', 'SA', 'TA', 'VI', 'WA', 'NT'], 'Lat': [-31.8759835, -22.1646782, -30.5343665, -42.035067, -36.5986096, -25.2303005, -19.491411], 'Lon': [147.2869493, 144.5844903, 135.6301212, 146.6366887, 144.6780052, 121.0187246, 132.550964]}
reg = pd.DataFrame(region_data)
reg