## Forest Fire Prediction using Logistic Regression

In [1]:
# importing require libraries

import numpy as np
import pandas as pd
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('dataset/Forest_fire.csv')

In [3]:
# getting first five rows of DataFrame
df.head()

Unnamed: 0,Area,Oxygen,Temperature,Humidity,Fire Occurrence
0,Jharkand,40,45,20,1
1,Bangalore,50,30,10,1
2,Ecuador,10,20,70,0
3,a,60,45,70,1
4,Bangalore,30,48,10,1


In [4]:
# information of all variables
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39 entries, 0 to 38
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Area             29 non-null     object
 1   Oxygen           39 non-null     int64 
 2   Temperature      39 non-null     int64 
 3   Humidity         39 non-null     int64 
 4   Fire Occurrence  39 non-null     int64 
dtypes: int64(4), object(1)
memory usage: 1.6+ KB


In [5]:
# converting DataFrame into array
data = np.array(df)

In [6]:
data[:5]

array([['Jharkand', 40, 45, 20, 1],
       ['Bangalore', 50, 30, 10, 1],
       ['Ecuador', 10, 20, 70, 0],
       ['a', 60, 45, 70, 1],
       ['Bangalore', 30, 48, 10, 1]], dtype=object)

### Splitting Data into X and Y

In [7]:
X = data[1:, 1:-1]

In [8]:
y = data[1:, -1]

In [10]:
X = X.astype('int')

In [9]:
y = y.astype('int')

In [12]:
print(X[:5])

[[50 30 10]
 [10 20 70]
 [60 45 70]
 [30 48 10]
 [50 15 30]]


In [13]:
print(y[:5])

[1 0 1 1 0]


### Train Test Split

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [19]:
print("Shape of X_train", X_train.shape)
print("Shape of X_test", X_test.shape)
print("Shape of y_train", y_train.shape)
print("Shape of y_test", y_test.shape)

Shape of X_train (26, 3)
Shape of X_test (12, 3)
Shape of y_train (26,)
Shape of y_test (12,)


### Logistic Regression

In [20]:
from sklearn.linear_model import LogisticRegression

In [21]:
lr = LogisticRegression()

In [22]:
lr.fit(X_train, y_train)

LogisticRegression()

In [23]:
# Coef of Model
lr.coef_

array([[ 0.27014258,  0.57960909, -0.05174004]])

In [24]:
# intercept of model
lr.intercept_

array([-25.82185769])

In [29]:
# prediction on testing set
y_test_pred = lr.predict(X_test)
y_test_pred

In [32]:
# prediction on training set
y_train_pred = lr.predict(X_train)
y_train_pred

array([0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 1])

In [None]:
# Testing using custom inputs
testInputs = [int(x) for x in "45 32 60".split(' ')]
final = [np.array(testInputs)]

In [28]:
# predicting output from final inputs

predictedOP = lr.predict_proba(final)

### Model Evaluation

In [33]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [34]:
# testing matrix
confusion_matrix(y_test, y_test_pred)

array([[6, 0],
       [0, 6]], dtype=int64)

In [35]:
# training matrix
confusion_matrix(y_train, y_train_pred)

array([[13,  0],
       [ 0, 13]], dtype=int64)

In [36]:
# accuracy score on testing dataset
accuracy_score(y_test, y_test_pred)

1.0

In [37]:
# accuracy score on training dataset
accuracy_score(y_train, y_train_pred)

1.0

- This is very small dataset, so it will always give 100% accuracy, however it may not be true

### Generating Model File

In [38]:
import pickle

In [39]:
pickle.dump(lr, open('model.pkl', 'wb'))

In [40]:
model = pickle.load(open('model.pkl', 'rb'))