In [6]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gpd
#data from 1973 - 2019
data = gpd.read_file('./ds2783.gdb/' , delim_whitespace = True)
data.info() #see what our data looks like

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 443421 entries, 0 to 443420
Data columns (total 40 columns):
 #   Column          Non-Null Count   Dtype   
---  ------          --------------   -----   
 0   RPT_YR          443421 non-null  int64   
 1   SURVEY_ID1      376258 non-null  object  
 2   SURVEY_ID2      375963 non-null  object  
 3   SURVEY_ID3      370896 non-null  object  
 4   DMG_TYPE1       443322 non-null  object  
 5   DMG_TYPE2       375142 non-null  object  
 6   DMG_TYPE3       374119 non-null  object  
 7   SEVERITY1       401858 non-null  object  
 8   SEVERITY2       375072 non-null  object  
 9   SEVERITY3       374012 non-null  object  
 10  PATTERN1        61754 non-null   object  
 11  PATTERN2        300 non-null     object  
 12  PATTERN3        31 non-null      object  
 13  TPA1            419113 non-null  float64 
 14  TPA2            376731 non-null  float64 
 15  TPA3            375771 non-null  float64 
 16  TPA_Total       440324 non-nul

Convert Geometry data to coordinates

In [7]:
data = data.set_crs(epsg = 3310)
data = data.to_crs(epsg = 4326)
data.geometry.bounds

Unnamed: 0,minx,miny,maxx,maxy
0,-117.633502,34.349113,-117.622626,34.355126
1,-117.707462,34.249876,-117.625757,34.327159
2,-117.969443,34.362519,-117.968780,34.363069
3,-117.748593,34.363877,-117.747930,34.364427
4,-118.210098,34.359959,-118.209435,34.360507
...,...,...,...,...
443416,-121.300868,39.448478,-121.300661,39.448639
443417,-122.343937,41.964263,-122.343722,41.964425
443418,-119.842470,38.568672,-119.842264,38.568833
443419,-120.724594,41.894543,-120.724380,41.894704


Only keep columns we are interested in (mostly location and date)

In [8]:
columns_to_drop = ['SURVEY_ID1', 'SURVEY_ID2', 'SURVEY_ID3', 'DMG_TYPE1', 'DMG_TYPE2', 'DMG_TYPE3', 
'SEVERITY2', 'SEVERITY3', 'PATTERN1', 'PATTERN2', 'PATTERN3', 'TPA1', 'TPA2', 'TPA3', 'NO_TREES1', 
'NO_TREES2', 'NO_TREES3', 'DCA1', 'DCA2', 'DCA3', 'HOST1', 'HOST2', 'HOST3', 'FOR_TYPE1', 'FOR_TYPE2',
 'FOR_TYPE3', 'NOTES', 'no_years', 'ADS_ID', 'Shape_Length0', 'Shape_Area0', 'Shape_Length', 'Shape_Area' , 'NO_TREES_TOTAL'] #list of columns to delete

data.drop(columns_to_drop , axis = 1 , inplace = True) #delete columns
data.info() #display info

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 443421 entries, 0 to 443420
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype   
---  ------     --------------   -----   
 0   RPT_YR     443421 non-null  int64   
 1   SEVERITY1  401858 non-null  object  
 2   TPA_Total  440324 non-null  float64 
 3   ACRES      443421 non-null  float64 
 4   Date       81386 non-null   object  
 5   geometry   443421 non-null  geometry
dtypes: float64(2), geometry(1), int64(1), object(2)
memory usage: 20.3+ MB


Remove fires less than 1000 acres and with no severity information

In [9]:
print('entries:' , len(data)) #display number of rows
data = data.loc[data['ACRES'] > 1000] #assign dataframe with the same frame but w/ acres > 10
print('entries:' , len(data))
missing_severity = data.loc[data['SEVERITY1'].isnull()] #find entries where severity is null
data.drop(missing_severity.index , inplace = True) #drop said entries
print('entries:' , len(data))

entries: 443421
entries: 6351
entries: 6335


Export cleaned data to a .geojson

In [10]:
data.info()
data.to_file('Clean_tree.geojson', driver = 'GeoJSON' , encoding = 'utf-8')

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 6335 entries, 1 to 427602
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   RPT_YR     6335 non-null   int64   
 1   SEVERITY1  6335 non-null   object  
 2   TPA_Total  6171 non-null   float64 
 3   ACRES      6335 non-null   float64 
 4   Date       2286 non-null   object  
 5   geometry   6335 non-null   geometry
dtypes: float64(2), geometry(1), int64(1), object(2)
memory usage: 346.4+ KB
