In [3]:
!pip install folium

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
%matplotlib inline

pd.set_option("display.max_columns",None)

import warnings
warnings.filterwarnings("ignore")

Collecting folium
  Using cached folium-0.14.0-py2.py3-none-any.whl (102 kB)
Collecting branca>=0.6.0
  Using cached branca-0.6.0-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.6.0 folium-0.14.0
[0m

In [None]:
# Load and check base data files from public S3 bucket
%store -r s3_private_path

In [5]:


fires_df = pd.read_csv(s3_private_path+'fires.csv', parse_dates=True)
weather_df = pd.read_csv(s3_private_path+'weather.csv', parse_dates=True)
conditions_df = pd.read_csv(s3_private_path+'conditions.csv', parse_dates=True)

FileNotFoundError: sagemaker-us-east-1-857283526476/widfire-risk/data/csv/fires.csv

In [None]:
fires_df.head()

In [None]:
weather_df.head()

In [None]:
conditions_df.head()

In [None]:
fires = fires_df.copy()
weather = weather_df.copy()
conditions = conditions_df.copy()

In [None]:
fires.shape 

In [None]:
weather.shape

In [None]:
conditions.shape

In [None]:
fires.info()

In [None]:
weather.info()

In [None]:
conditions.info()

In [None]:
fires.nunique()

In [None]:
fires.columns

In [None]:
weather.nunique()

In [None]:
conditions.nunique()

In [None]:
fires.describe()

In [None]:
weather.describe()

In [None]:
conditions.describe()

In [None]:
fires['ContainmentDateTime'] = pd.to_datetime(fires['ContainmentDateTime'])
fires['ControlDateTime'] = pd.to_datetime(fires['ControlDateTime'])
fires['EstimatedCostToDate'] = pd.to_datetime(fires['EstimatedCostToDate'])
fires['FireDiscoveryDateTime'] = pd.to_datetime(fires['FireDiscoveryDateTime'])
fires['FireOutDateTime'] = pd.to_datetime(fires['FireOutDateTime'])

In [None]:
print(fires.dtypes)

In [None]:
# fires.filter(like='Date').apply(pd.to_datetime)

# print(fires.dtypes)

In [None]:
fires.isnull().sum() 

In [None]:
weather.isnull().sum()

In [None]:
weather['DATE'] = pd.to_datetime(weather['DATE'])

In [None]:
print(weather.dtypes)

In [None]:
fires.groupby('FireCause').size()

In [None]:
sns.countplot(x = 'FireCause', data=fires)

In [None]:
numerical_features_w = weather.select_dtypes(include=["number"])
numerical_features_w = numerical_features_w.drop(["LONGITUDE", "LATITUDE"], axis=1)
numerical_features_w.head()

In [None]:
numerical_features_f = fires.select_dtypes(include=["number"])
numerical_features_f = numerical_features_f.drop(["InitialLongitude", "InitialLatitude"], axis=1)
numerical_features_f.head()

In [None]:
# Extract the year from the FireDiscoveryDateTime column
fires['Year'] = fires['FireDiscoveryDateTime'].dt.year
fires['month'] = fires['FireDiscoveryDateTime'].dt.month
fires['day'] = fires['FireDiscoveryDateTime'].dt.day  

# Count the number of fires in each year
fires_by_year = fires['Year'].value_counts()

# Create a histogram of the number of fires in each year
plt.hist(fires['Year'], bins=len(fires_by_year))
plt.xlabel('Year')
plt.ylabel('Number of fires')
plt.show()

In [None]:
# Drop rows with missing latitude or longitude values
fires = fires.dropna(subset=['InitialLatitude', 'InitialLongitude'])

In [None]:
# Create a map centered on the US
mapb = folium.Map(location=[39.8283, -98.5795], zoom_start=4)

# Add a marker for each fire location
# for index, row in fires.iterrows():
#     folium.Marker([row['InitialLatitude'], row['InitialLongitude']]).add_to(map)

# Show the map
# map

In [None]:
# Fire damage

plt.scatter(fires['InitialLongitude'], fires['InitialLatitude'], s=fires['FinalAcres'], alpha=0.5)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Wildfires by Location and Size')
plt.show()

In [None]:
# Box Plot of Fire Duration
fires['duration'] = fires['FireOutDateTime'] - fires['FireDiscoveryDateTime']
plt.boxplot(fires['duration'].dt.days.dropna())
plt.ylabel('Duration in days')
plt.title('Wildfire Duration')
plt.show()

In [None]:
# Scatterplot to show cost vs acres of damage
subset = fires[['EstimatedCostToDate', 'FinalAcres']]

subset = subset.dropna()

# Create scatter plot
plt.scatter(subset['FinalAcres'], subset['EstimatedCostToDate'])
plt.xlabel('Final Acres')
plt.ylabel('Estimated Cost To Date')
plt.title('Relationship Between Wildfire Size and Cost')
plt.show()

In [None]:
# Stacked bar graph to see wildfires by month and any seasonal patterns between the causes
subset = fires[['FireCause', 'FireDiscoveryDateTime']]

subset = subset.dropna()

grouped = subset.groupby([subset['FireDiscoveryDateTime'].dt.month, 'FireCause']).size().unstack()

grouped.plot(kind='bar', stacked=True)
plt.xlabel('Month')
plt.ylabel('Number of Wildfires')
plt.title('Number of Wildfires by Month and Cause')
plt.show()

In [None]:
# Drop rows with missing latitude or longitude values
weather = weather.dropna(subset=['LATITUDE', 'LONGITUDE'])

In [None]:
fires['FireDiscoveryDateTime'] = fires['FireDiscoveryDateTime'].dt.tz_localize(None) #remove timezone

In [None]:
merged_df = pd.merge(fires, weather, how='left', left_on=['FireDiscoveryDateTime', 'InitialLongitude', 'InitialLatitude'], right_on=['DATE', 'LONGITUDE', 'LATITUDE'])

In [None]:
merged_df.head()