In [14]:
# Connect Pandas to SQL.
from sqlalchemy import create_engine
from sqlalchemy import join
from sqlalchemy.sql import select
import pandas as pd

db_string = f"postgresql://postgres:anupriya20@127.0.0.1:5432/wildfire_db"
engine = create_engine(db_string)

In [25]:
#Perfom inner join on fire_incidents, fire_locations and fire_impacts


query1 = "select fire_incidents.major_incident,fire_impacts.total_acres_burned,fire_impacts.injuries ,"\
         "fire_impacts.structure_impacted ,"\
         "fire_locations.minimum_temp ,"\
         "fire_locations.maximum_temp ,"\
         "fire_locations.average_temp ,"\
         "fire_locations.wind_speed , "\
         "fire_locations.county_name  "\
         "FROM fire_incidents "\
         "JOIN fire_impacts ON fire_incidents.fire_name = fire_impacts.fire_name "\
         "JOIN fire_locations ON fire_incidents.fire_name = fire_locations.fire_name "

#result = engine.execute("Select fire_incidents.major_incident, fire_impacts.total_acres_burned,fire_impacts.injuries, fire_impacts.structure_impacted,fire_locations.minimum_temp,  fire_locations.maximum_temp,fire_locations.average_temp,fire_locations.wind_speed,fire_locations.county_name from fire_incidents join fire_impacts ON fire_incidents.fire_name = fire_impacts.fire_name JOIN fire_locations ON fire_incidents.fire_name = fire_locations.fire_name")

result = engine.execute(query1)


In [26]:
#Read query result into dataframe

impact_df = pd.DataFrame.from_records(result, columns=['major_incident', 'total_acres_burned','injuries','structure_impacted','minimum_temp','maximum_temp','average_temp','wind_speed','county_name'])

In [27]:
impact_df.head()

Unnamed: 0,major_incident,total_acres_burned,injuries,structure_impacted,minimum_temp,maximum_temp,average_temp,wind_speed,county_name
0,False,257314,0,0,17,36,26,6,Tuolumne
1,False,30274,0,0,17,32,24,37,Los Angeles
2,False,27531,0,0,28,43,36,14,Riverside
3,False,27440,0,0,-273,-273,-273,-1,Placer
4,True,24251,10,16,-273,-273,-273,-1,Ventura


In [20]:
#Rename columns
impact_df = impact_df.rename(columns={'minimum_temp':'minimum_temp(°C)',
                                      'maximum_temp':'maximum_temp(°C)',
                                      'average_temp':'average_temp(°C)',
                                      'wind_speed': 'wind_speed(km/hr)'
                                     })

impact_df.head()

Unnamed: 0,major_incident,total_acres_burned,injuries,structure_impacted,minimum_temp(°C),maximum_temp(°C),average_temp(°C),wind_speed(km/hr),county_name
0,False,257314,0,0,17,36,26,6,Tuolumne
1,False,30274,0,0,17,32,24,37,Los Angeles
2,False,27531,0,0,28,43,36,14,Riverside
3,False,27440,0,0,-273,-273,-273,-1,Placer
4,True,24251,10,16,-273,-273,-273,-1,Ventura


# Train the model using LogisticRegression 

In [19]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
scaled_data = StandardScaler()
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report


In [20]:
#Label encoding

le = LabelEncoder()
impact_df_Copy = impact_df.copy()
impact_df_Copy['counties'] = le.fit_transform(impact_df_Copy['county_name'])

In [21]:
#Scaling

scaled_data = StandardScaler()
Structure_Impacted_scaled = scaled_data.fit_transform(impact_df_Copy[["structure_impacted"]])

impact_df_Copy["Structure_Impacted_scaled"] = Structure_Impacted_scaled

In [22]:
impact_df_Copy.head()

Unnamed: 0,major_incident,total_acres_burned,injuries,structure_impacted,minimum_temp,maximum_temp,average_temp,wind_speed(km/hr),county_name,counties,Structure_Impacted_scaled
0,False,257314,0,0,17,36,26,6,Tuolumne,53,-0.100307
1,False,30274,0,0,17,32,24,37,Los Angeles,17,-0.100307
2,False,27531,0,0,28,43,36,14,Riverside,32,-0.100307
3,False,27440,0,0,-273,-273,-273,-1,Placer,30,-0.100307
4,True,24251,10,16,-273,-273,-273,-1,Ventura,54,-0.032889


In [26]:
#Split data into target and features

y = impact_df_Copy["major_incident"]
X = impact_df_Copy[["total_acres_burned","injuries","maximum_temp","minimum_temp","average_temp","wind_speed(km/hr)","counties","Structure_Impacted_scaled"]]

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=42, 
                                                    stratify=y)
X_train.shape

(805, 8)

In [28]:
#Train the model 

classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)

classifier.fit(X_train, y_train)

LogisticRegression(max_iter=200, random_state=1)

In [29]:
#calculate the predictions

y_pred = classifier.predict(X_test)

results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head(20)

Unnamed: 0,Prediction,Actual
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
5,False,False
6,False,False
7,False,False
8,False,False
9,False,False


In [30]:
print(accuracy_score(y_test, y_pred))

0.8438661710037175


In [31]:
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[207   0]
 [ 42  20]]


In [32]:
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

       False       0.83      1.00      0.91       207
        True       1.00      0.32      0.49        62

    accuracy                           0.84       269
   macro avg       0.92      0.66      0.70       269
weighted avg       0.87      0.84      0.81       269

