In [12]:
#importing various libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, f1_score
import pickle
import keras
from keras.models import Sequential
from keras.layers import Dense

In [3]:
#function to preprocess the data
def preprocessing(file_name, complete_data = False) :
    
    #reading the data from the provided csv file
    dataset=pd.read_csv(file_name,parse_dates={'datetime': ['acq_date', 'acq_time']})
    dataset=dataset.drop(columns=['version'])
    
    #converting categorical data into numeric value using labelencoder
    col_names=['satellite','daynight']
    le=LabelEncoder()
    for name in col_names:
        dataset[name]=le.fit_transform(dataset[name])
    dataset['fire'] = (dataset.confidence>65)*1
    
    #if the data if for training, sampling the data, otherwire simply making a copy
    if complete_data == True :
        sampdata=dataset.sample(n=30000)
    else :
        sampdata = dataset.copy(deep = True)
    
    #combinig corellated features
    sampdata['scan_track'] = sampdata[['scan', 'track']].mean(axis=1)
    
    #storing coordinates saperately
    coordinates = sampdata.filter(['latitude', 'longitude'])
    
    #dropping the columns which are not required
    sampdata = sampdata.drop(columns=['latitude','longitude','confidence','scan', 'track'])
    X=sampdata.iloc[:, [1,2,3,4,5,7]].values
    y=sampdata.iloc[:, -2].values
    
    return X, y, coordinates

In [4]:
#nadling the 
def train_data(file_name):
    
    #receiving processed data from the respective function
    X, y, coordinates = preprocessing(file_name, complete_data = True)
    
    #splitting the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
    
    #scaling the data using standard scaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.fit_transform(X_test)
    
    #using smote to handle unbalenced data
    sm = SMOTE(random_state=2)
    X_train_res, y_train_res = sm.fit_sample(X_train, y_train.ravel())
    
    return X_train_res, y_train_res, X_test, y_test, coordinates

In [5]:
def model_train(file_name):
    
    #recieiving trainng and validation data
    X_train_res, y_train_res, X_test, y_test, coordinates = train_data(file_name)
    
    #creating the model
    gb = GradientBoostingClassifier(n_estimators=20, learning_rate = 0.15, max_features=4, max_depth = 9, random_state = 2, subsample = 0.7)
    gb.fit(X_train_res, y_train_res)
    y_train_pred = gb.predict(X_train_res)
    y_test_pred = gb.predict(X_test)
    
    #printing the results
    print("Learning rate: ", 0.15)
    print("Accuracy score (training): {0:.3f}".format(gb.score(X_train_res, y_train_res)))
    print("Accuracy score (validation): {0:.3f}".format(gb.score(X_test, y_test)))
    print("F1 score (training): {0:.3f}".format(f1_score(y_train_res, y_train_pred)))
    print("F1 score (validation): {0:.3f}".format(f1_score(y_test, y_test_pred)))
    
    #saving the model and coordintes
    filename = 'model_output/finalized_model.sav'
    pickle.dump(gb, open(filename, 'wb'))
    coordinates.to_csv(r'output\coordinates.csv')

In [10]:
def live_data(file_name):
    
    #receiving processed data from the respective function
    X, y, coordinates = preprocessing(file_name, complete_data = False)
    
    #scaling the data using standard scaler
    sc = StandardScaler()
    X = sc.fit_transform(X)
    
    #loading our trained model
    filename = 'finalized_model.sav'
    model = pickle.load(open(filename, 'rb'))
    
    #predicting fire/no-fire (1/0)
    y_pred = model.predict(X)
    
    #accuracy and F1 score
    print("Accuracy score : {0:.3f}".format(model.score(X, y)))
    print("F1 score : {0:.3f}".format(f1_score(y, y_pred)))
    
    #convering the predicted column to dataframe and storing as csv file
    df = pd.DataFrame({'Fire': y_pred})
    df.to_csv(r'output\predicted.csv')
    coordinates.to_csv(r'output\coordinates.csv')

In [11]:
live_data('MODIS.csv')

Accuracy score : 0.814
F1 score : 0.827


In [17]:
# def model_trainnn(file_name):
    
#     #recieiving trainng and validation data
#     X_train_res, y_train_res, X_test, y_test, coordinates = train_data(file_name)
    
#     #creating the model
#     classifier = Sequential()

#     #adding first hidden layer to ANN
#     classifier.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu', input_dim = 6))
#     # second hidden layer
#     classifier.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu'))
#     #output layer
#     classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

#     #compiling ANN
#     classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

#     #fitting the ANN model
#     classifier.fit(X_train_res, y_train_res, batch_size = 10, epochs = 200)

#     #predicting the test set results (probabilities)
#     y_pred = classifier.predict(X_test)
#     y_pred_tf = (y_pred > 0.5)*1
    
#     ty_pred = classifier.predict(X_train_res)
#     ty_pred_tf = (y_pred > 0.5)*1
#     #printing the results
# #     print("Learning rate: ", 0.15)
# #     print("Accuracy score (training): {0:.3f}".format(gb.score(X_train_res, y_train_res)))
# #     print("Accuracy score (validation): {0:.3f}".format(gb.score(X_test, y_test)))
#     print("F1 score (training): {0:.3f}".format(f1_score(y_train_res, ty_pred_tf)))
#     print("F1 score (validation): {0:.3f}".format(f1_score(y_test, y_pred_tf)))
    
#     #saving the model and coordintes
# #     filename = 'model_output/finalized_model.sav'
# #     pickle.dump(gb, open(filename, 'wb'))
# #     coordinates.to_csv(r'output\coordinates.csv')