In [None]:
import pandas as pd
import numpy as np
import json
import requests
import pprint
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sqlalchemy import create_engine

In [None]:
engine = create_engine('sqlite:///data.sqlite')

In [None]:
conn = engine.connect()

In [None]:
fire_size_cause_df = pd.read_sql("SELECT FIRE_SIZE, NWCG_CAUSE_CLASSIFICATION FROM fires", conn)
fire_size_cause_df = fire_size_cause_df.groupby('NWCG_CAUSE_CLASSIFICATION').sum()

fire_count_cause_df = pd.read_sql("SELECT FIRE_SIZE, NWCG_CAUSE_CLASSIFICATION FROM fires", conn)
fire_count_cause_df = fire_count_cause_df.groupby('NWCG_CAUSE_CLASSIFICATION').count()

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10,4))

ax1 = fire_size_cause_df.plot.bar(color="mediumaquamarine", alpha=0.90, align="center", ax=axes[0])
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=0)
ax1.set_xticklabels(["Human","Missing Data","Natural"])
plt.title("Fire Size By Cause")
plt.xlabel("Fire Cause")
plt.ylabel("Sum of Fire Size")


ax2 = fire_count_cause_df.plot.bar(color="orange", alpha=0.90, align="center", ax=axes[1])
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=0)
ax2.set_xticklabels(["Human","Missing Data","Natural"])
plt.title("Fire Count By Cause")
plt.xlabel("Fire Cause")
plt.ylabel("Sum of Fire Size")

plt.tight_layout()

plt.show()

In [None]:
fire_count_year_query = pd.read_sql("SELECT SUM(FIRE_YEAR) FROM fires GROUP BY FIRE_YEAR", conn)

new_index_values = [1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 
                    2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020] 

fire_count_year_df = pd.DataFrame(fire_count_year_query.values, index=new_index_values)

In [None]:
fire_count_year_df.plot(legend=False)

plt.title("Count of Fires Over Time")
plt.xlabel("Year")
plt.ylabel("Count of Fires")

x_tick_num = [1995, 2000, 2005, 2010, 2015, 2020]

plt.xticks(x_tick_num)
plt.show()

In [None]:
# # Time checker
# start_time = time.time()
# print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
# https://www.ncei.noaa.gov/access/services/search/
# v1/data?dataset=global-hourly&startDate=2016-01-01T00:00:00&endDate=2017-12-31T23:59:59&
# dataTypes=TMP&limit=10&offset=90

url = "https://www.ncei.noaa.gov/access/services/search/v1/data?"


dataset = "dataset=global-hourly&"

startdate = "startDate=1992-01-01T00:00:00&"

enddate = "endDate=2020-12-31T23:59:59&"

dtypes = "dataTypes=TMP&limit=10&offset=90"


query_url = url + dataset + startdate + enddate + dtypes
rain_response = requests.get(query_url)
rain_json = rain_response.json()

rain_formatted_str = json.dumps(rain_json, indent=2)

print(f"{rain_formatted_str}")

In [None]:
# begin testing 01_noScale_logisticRegression
# full_fire_df = pd.read_sql("SELECT FIRE_SIZE, NWCG_GENERAL_CAUSE, DISCOVERY_DOY FROM fires", conn)
full_fire_df = pd.read_sql("SELECT FIRE_SIZE, NWCG_GENERAL_CAUSE, DISCOVERY_DOY FROM fires WHERE FIRE_YEAR = 2020", conn)
# full_fire_df['FIRE_SIZE'] = full_fire_df['FIRE_SIZE'].multiply(100)
# full_fire_df = full_fire_df.astype({'FIRE_SIZE':'int'})
full_fire_df

In [None]:
# Event or circumstance that started a fire or set the stage for its occurrence (Arson/incendiarism, Debris and open burning, Equipment and vehicle use, Firearms and explosives use, Fireworks, Misuse of fire by a minor, Natural, Power generation/transmission/distribution, Railroad operations and maintenance, Recreation and ceremony, Smoking, Other causes, Missing data/not specified/undetermined).
mapping = {'Arson/incendiarism': 0, 'Debris and open burning': 1, 'Equipment and vehicle use': 2,
          'Firearms and explosives use': 3, 'Fireworks': 4, 'Misuse of fire by a minor': 5,
          'Natural': 6, 'Power generation/transmission/distribution': 7, 'Railroad operations and maintenance': 8,
          'Recreation and ceremony': 9, 'Smoking': 10, 'Other causes': 11, 'Missing data/not specified/undetermined': 12}
full_fire_df['NWCG_GENERAL_CAUSE'] = full_fire_df['NWCG_GENERAL_CAUSE'].replace(mapping)
full_fire_df

In [None]:
print(full_fire_df.columns.values.tolist())

In [None]:
full_fire_df['FIRE_SIZE'] = np.where(full_fire_df['FIRE_SIZE'] >= 10, 1, 0)

In [None]:
full_fire_df.head()

In [None]:
#=========================NO SCALE=======================#

y = full_fire_df['FIRE_SIZE']
X = full_fire_df.drop(['FIRE_SIZE'], axis=1)

In [None]:
from sklearn.model_selection import train_test_split


# Split the data using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [None]:
from sklearn.linear_model import LogisticRegression

# Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model
logistic_regression_model = LogisticRegression(class_weight = 'balanced', solver='lbfgs', random_state=1) #, max_iter=100000)

# Fit the model using training data
logistic_regression_model.fit(X_train, y_train)

In [None]:
# Print the balanced_accuracy score of the model
print(f"Training Data Score: {logistic_regression_model.score(X_train, y_train)}")
print(f"Testing Data Score: {logistic_regression_model.score(X_test, y_test)}")

In [None]:
# Make a prediction using the testing data
testing_prediction = logistic_regression_model.predict(X_test)
pd.DataFrame({"Prediction": testing_prediction, "Actual": y_test})

In [None]:
predictions = logistic_regression_model.predict(X_test)
results = pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True)
results.head(10)

In [None]:
from sklearn.metrics import accuracy_score
wildfire_test_report = classification_report(y_test, predictions)

print(wildfire_test_report)

In [None]:
#=========================SCALING=======================#
from sklearn.preprocessing import StandardScaler

# Scaling the numeric columns
full_fire_scaled = StandardScaler().fit_transform(full_fire_df[["FIRE_SIZE", "NWCG_GENERAL_CAUSE", "DISCOVERY_DOY"]])

# Creating a DataFrame with with the scaled data
full_fire_scaled_df = pd.DataFrame(full_fire_scaled, columns=[["FIRE_SIZE", "NWCG_GENERAL_CAUSE", "DISCOVERY_DOY"]])

# Display sample data
full_fire_scaled_df.head()

In [None]:
y = full_fire_scaled_df['FIRE_SIZE']
X = full_fire_scaled_df.drop(['FIRE_SIZE'], axis=1)

In [None]:
from sklearn.model_selection import train_test_split


# Split the data using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
y_train.value_counts()

In [None]:
from sklearn.linear_model import LogisticRegression

# Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model
logistic_regression_model = LogisticRegression(random_state=1)

# Fit the model using training data
logistic_regression_model = logistic_regression_model.fit(X_train, y_train)

In [None]:
#Train
# classifier.fit(X_train, y_train)
# classifier.fit(X_train, y_train)

In [None]:
session.close()