In [3]:
# data organization libraries
import numpy as np
import pandas as pd

# data visualization libraries
import plotly.express as px
import matplotlib.pyplot as plt

# modeling libraries
import sklearn as sk


In [17]:
# download data and extract some info

fire_df = pd.read_csv('forestfires.csv')

fire_df.info()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517 entries, 0 to 516
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X       517 non-null    int64  
 1   Y       517 non-null    int64  
 2   month   517 non-null    object 
 3   day     517 non-null    object 
 4   FFMC    517 non-null    float64
 5   DMC     517 non-null    float64
 6   DC      517 non-null    float64
 7   ISI     517 non-null    float64
 8   temp    517 non-null    float64
 9   RH      517 non-null    int64  
 10  wind    517 non-null    float64
 11  rain    517 non-null    float64
 12  area    517 non-null    float64
dtypes: float64(8), int64(3), object(2)
memory usage: 52.6+ KB


In [18]:
fire_df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [20]:
simp_fire = fire_df.iloc[:, [0,1,2,3,8,10,11,12]]

simp_fire.head()

Unnamed: 0,X,Y,month,day,temp,wind,rain,area
0,7,5,mar,fri,8.2,6.7,0.0,0.0
1,7,4,oct,tue,18.0,0.9,0.0,0.0
2,7,4,oct,sat,14.6,1.3,0.0,0.0
3,8,6,mar,fri,8.3,4.0,0.2,0.0
4,8,6,mar,sun,11.4,1.8,0.0,0.0


In [27]:


months_to_remove = ['nov', 'jan', 'may']

dropped = simp_fire.drop(simp_fire[simp_fire['month'].isin(months_to_remove)].index, axis=0)

dropped.head()

Unnamed: 0,X,Y,month,day,temp,wind,rain,area
0,7,5,mar,fri,8.2,6.7,0.0,0.0
1,7,4,oct,tue,18.0,0.9,0.0,0.0
2,7,4,oct,sat,14.6,1.3,0.0,0.0
3,8,6,mar,fri,8.3,4.0,0.2,0.0
4,8,6,mar,sun,11.4,1.8,0.0,0.0


In [34]:
clean_fires = fire_df.drop(fire_df[fire_df['month'].isin(months_to_remove)].index, axis=0)

clean_fires.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [35]:
# change month/day to numbers
weekdaystr2num = {
    'sun': 0,
    'mon': 1,
    'tue': 2,
    'wed': 3,
    'thu': 4,
    'fri': 5,
    'sat': 6
}

monthstr2num = {
    'jan': 1,
    'feb': 2,
    'mar': 3,
    'apr': 4,
    'may': 5,
    'jun': 6,
    'jul': 7,
    'aug': 8,
    'sep': 9,
    'oct': 10,
    'nov': 11,
    'dec': 12
}

##### START #####
clean_fires['day'] = clean_fires['day'].replace(weekdaystr2num)
clean_fires['month'] = clean_fires['month'].replace(monthstr2num)
##### END #####

clean_fires.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,3,5,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,10,2,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,10,6,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,3,5,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,3,0,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [36]:
clean_fires.to_csv('clean_fires.csv', index=False)

In [28]:
len(dropped)

512

In [29]:
dropped.corr()

Unnamed: 0,X,Y,temp,wind,rain,area
X,1.0,0.541952,-0.055997,0.015908,0.065443,0.063649
Y,0.541952,1.0,-0.024425,-0.018801,0.033155,0.045778
temp,-0.055997,-0.024425,1.0,-0.239738,0.069408,0.096509
wind,0.015908,-0.018801,-0.239738,1.0,0.06112,0.011785
rain,0.065443,0.033155,0.069408,0.06112,1.0,-0.007426
area,0.063649,0.045778,0.096509,0.011785,-0.007426,1.0


In [32]:
dropped.to_csv('simple_fire.csv', index=False)  


In [33]:
dropped.head()

Unnamed: 0,X,Y,month,day,temp,wind,rain,area
0,7,5,mar,fri,8.2,6.7,0.0,0.0
1,7,4,oct,tue,18.0,0.9,0.0,0.0
2,7,4,oct,sat,14.6,1.3,0.0,0.0
3,8,6,mar,fri,8.3,4.0,0.2,0.0
4,8,6,mar,sun,11.4,1.8,0.0,0.0
