In [1]:
import pandas as pd
import matplotlib as plot
import json
from datetime import datetime

from pymongo import MongoClient
from pprint import pprint
from pathlib import Path
from pandas_geojson import to_geojson, write_geojson

In [2]:
wildfires_load = Path('../H-Wildfire/Resources/Wildfires 2.csv')
wildfires_data = pd.read_csv(wildfires_load)
wildfires_df = pd.DataFrame(wildfires_data)

In [3]:
wildfires_df.describe
wildfires_df.dtypes

index                 int64
FIRE_YEAR             int64
Fire_Date            object
STAT_CAUSE_DESCR     object
CONT_TIME           float64
FIRE_SIZE           float64
FIRE_SIZE_CLASS      object
LATITUDE            float64
LONGITUDE           float64
STATE                object
dtype: object

In [4]:
mongo = MongoClient(port=27017)
print(mongo.list_database_names())

['Class_db', 'admin', 'autosaurus', 'classDB', 'config', 'epa', 'fruits_db', 'lightning_fires_db', 'local', 'met', 'nasa_fires', 'petsitly_marketing', 'travel_db', 'uk_food', 'wildfires_db']


In [5]:
wildfires_df.head(20)
column_drop = ['STAT_CAUSE_DESCR']
wildfires_df2 = wildfires_df.drop(columns=column_drop)
wildfires_df2.tail(20)



Unnamed: 0,index,FIRE_YEAR,Fire_Date,CONT_TIME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,STATE
278448,1879968,2008,2008-06-22,,3000.0,F,40.956983,-121.321236,CA
278449,1879993,2009,2009-06-03,1247.0,0.1,A,40.31,-122.849,CA
278450,1880019,2009,2009-05-30,,0.1,A,41.50615,-122.3623,CA
278451,1880036,2010,2010-01-19,,0.01,A,38.628181,-122.903581,CA
278452,1880042,2009,2009-08-05,,1.0,B,41.178567,-121.846617,CA
278453,1880067,2009,2009-08-03,,1.0,B,40.915883,-121.568233,CA
278454,1880118,2009,2009-08-02,2247.0,1.0,B,40.322017,-120.76545,CA
278455,1880121,2009,2009-08-05,,0.1,A,40.986882,-121.132924,CA
278456,1880128,2010,2010-07-15,,0.25,A,34.450561,-117.033446,CA
278457,1880191,2010,2010-08-25,,38.0,C,33.563736,-117.031103,CA


In [6]:
# Define custom ranges for sorting
year_ranges = [(1992,1999), (2000,2007), (2008,2015)]

# Create a new column with the corresponding range
wildfires_df2['Year Range'] = pd.cut(wildfires_df2['FIRE_YEAR'], bins=[1991, 1999, 2008, 2016], 
                                     labels=['1992-1999', '2000-2007', '2008-2015'])

# Sort the DataFrame by the new column
wildfires_sorted_df = wildfires_df2.sort_values(by='Year Range')
wildfires_df3 = wildfires_df2.sort_values('FIRE_YEAR', ascending=True)
wildfires_df3.head(20)

Unnamed: 0,index,FIRE_YEAR,Fire_Date,CONT_TIME,FIRE_SIZE,FIRE_SIZE_CLASS,LATITUDE,LONGITUDE,STATE,Year Range
24295,45206,1992,1992-06-03,1430.0,0.2,A,33.966667,-111.05,AZ,1992-1999
24775,45912,1992,1992-06-25,1938.0,0.1,A,39.018333,-111.725,UT,1992-1999
24774,45911,1992,1992-06-23,1250.0,1.0,B,38.561667,-112.063333,UT,1992-1999
24773,45910,1992,1992-06-23,2119.0,1.0,B,38.563333,-112.063333,UT,1992-1999
24772,45909,1992,1992-06-06,1928.0,0.3,B,38.868333,-111.718333,UT,1992-1999
24771,45905,1992,1992-08-21,1730.0,0.1,A,38.288333,-112.551667,UT,1992-1999
24770,45904,1992,1992-08-18,1730.0,3.0,B,38.553333,-112.458333,UT,1992-1999
24769,45903,1992,1992-08-13,2000.0,0.1,A,38.575,-112.391667,UT,1992-1999
117450,239927,1992,1992-08-04,2200.0,0.1,A,34.89382,-108.21252,NM,1992-1999
117449,239926,1992,1992-08-01,2000.0,0.1,A,34.91765,-108.11288,NM,1992-1999


In [9]:
wild_year_1 = wildfires_df3.loc[(wildfires_df3['Year Range'] == '1992-1999') & (wildfires_df3['FIRE_SIZE_CLASS'].isin(['G', 'F', 'E']))]
wild_year_2 = wildfires_df3.loc[(wildfires_df3['Year Range'] == '2000-2007') & (wildfires_df3['FIRE_SIZE_CLASS'].isin(['G','F','E']))]
wild_year_3 = wildfires_df3.loc[(wildfires_df3['Year Range'] == '2008-2015') & (wildfires_df3['FIRE_SIZE_CLASS'].isin(['G','F','E']))]

wildfires_df1 = pd.DataFrame(wild_year_1)
wildfires_df2 = pd.DataFrame(wild_year_2)
wildfires_df3 = pd.DataFrame(wild_year_3)

In [10]:
geojson_path1 = Path('../H-Wildfire/Resources/wildfires_1992_1999.geojson')
geojson_path2 = Path('../H-Wildfire/Resources/wildfires_2000_2007.geojson')
geojson_path3 = Path('../H-Wildfire/Resources/wildfires_2008_2015.geojson')

wildfires_geojson1 = to_geojson(wildfires_df1, lat='LATITUDE', lon='LONGITUDE',
                                properties=['FIRE_YEAR', 'Fire_Date','CONT_TIME', 'FIRE_SIZE', 'FIRE_SIZE_CLASS','STATE'],
                               )
wildfires_geojson2 = to_geojson(wildfires_df2, lat='LATITUDE', lon='LONGITUDE',
                                properties=['FIRE_YEAR', 'Fire_Date','CONT_TIME', 'FIRE_SIZE', 'FIRE_SIZE_CLASS','STATE'],
                               )
wildfires_geojson3 = to_geojson(wildfires_df3, lat='LATITUDE', lon='LONGITUDE',
                                properties=['FIRE_YEAR', 'Fire_Date','CONT_TIME', 'FIRE_SIZE', 'FIRE_SIZE_CLASS','STATE'],
                               )
write_geojson(wildfires_geojson1, geojson_path1)
write_geojson(wildfires_geojson2, geojson_path2)
write_geojson(wildfires_geojson3, geojson_path3)