# California 2017 Fire recovery 

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
from shapely.geometry import Point, LineString, MultiPolygon, asMultiPolygon, Polygon
from shapely import wkb, wkt
import shapely
import geopandas as gpd
from shapely.ops import unary_union
import requests
from bs4 import BeautifulSoup
import re
import os
import zipfile
import wget
from datetime import datetime
from multiprocessing.dummy import Pool as ThreadPool 
%matplotlib inline

import sys
# sys.path.insert(0, '/Users/jianglongli/Desktop/workbook/Freddie_project/PostGIS/gisfeaturecode_v7/')
from mapping_utility_v2 import map_geopandas, map_AllHouses
from mapping_utility_fire import map_geopandas_fire
from python_postgis_talk_utility import transform_pd_to_gpd_general, transform_pd_to_gpd
from cali_fire_utility import geomatch, readin_shapefile, timer, fire_postprocessing, create_fire_union
from cali_fire_utility import download_and_create_shp, download_read_curent_fire

pd.options.display.max_columns = 100

In [2]:
import warnings
warnings.simplefilter("ignore")

### read in hve turned off file and try matching

In [3]:
hve = pd.read_csv('/Users/jianglongli/Desktop/workbook/data/cali_turnedoff/cali_turned_off.csv', dtype={'zip': str})
hve.loc[:, 'long'] = hve['long'].apply(lambda x: -x if x>0 else x)
hve = hve[hve.lat.notnull()]
hve.shape

(324931, 6)

### download shp files and create total geopandas dataframe

In [4]:
url_cali = "https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/current_year_fire_data/California/"
url_master = "https://rmgsc.cr.usgs.gov"
folder_root = '/Users/jianglongli/Desktop/workbook/data/disaster_recovery'
folder_sub = 'cali_fire'

In [5]:
%%time
gdf = download_and_create_shp(url_cali, url_master, folder_root, folder_sub, 
                              download_type='zip', unzip=True, verb=True)


########## scraping start: 10:18:16 ##########
totally 1558 zip files url parsed! 10:19:48

########## check unzipped files: 10:19:48 ##########
CPU times: user 42.2 s, sys: 519 ms, total: 42.7 s
Wall time: 2min 10s


In [6]:
gdf = fire_postprocessing(gdf)

Ring Self-intersection at or near point -122.29322609958997 38.368854461891438
Ring Self-intersection at or near point -122.29322609958997 38.368854461891438
Ring Self-intersection at or near point -122.29322609958997 38.368854461891438
Ring Self-intersection at or near point -122.29322609959007 38.368854461891388
Ring Self-intersection at or near point -122.29322609958997 38.368854461891438
Ring Self-intersection at or near point -120.99716398229684 41.335605047406816
Ring Self-intersection at or near point -120.03658782922585 37.62942306856656
Ring Self-intersection at or near point -120.0365878176455 37.62942307763192
Ring Self-intersection at or near point -120.0365878288182 37.62942306858718
Ring Self-intersection at or near point -120.0365878176455 37.62942307763192
Ring Self-intersection at or near point -120.0365878288182 37.62942306858718
Ring Self-intersection at or near point -120.0365878176455 37.62942307763192
Ring Self-intersection at or near point -123.62637115611462 41.

In [8]:
%%time
gdf_union = create_fire_union(gdf, 6) 

CPU times: user 21min 20s, sys: 10.6 s, total: 21min 30s
Wall time: 13min 8s


In [232]:
%%time
print(timer())
result = geomatch(hve, gdf_union, 'firename')

20:33:50
CPU times: user 17min 5s, sys: 3.79 s, total: 17min 8s
Wall time: 17min 6s


**download current fire shapefile**

In [261]:
gdf_current = download_read_curent_fire(folder_root)
gdf_current_ca = gdf_current[gdf_current.state == 'CA']
gdf_current_ca = fire_postprocessing(gdf_current_ca)
result_current = geomatch(hve, gdf_current_ca, 'firename', ['firename', 'geometry', 'perdattime'])

### fire impact reporting

+ **fire this year**

In [287]:
counts = result.firename.value_counts()
summary = pd.concat([counts.rename('count'), gdf_union.set_index('firename')], axis=1, join='inner')
summary = summary.reset_index().rename(columns={'index': 'firename'})
summary = transform_pd_to_gpd(summary, geometry='geometry')

In [284]:
summary.to_csv(folder_root + '/' + 'fire_summary.csv', index=False)

In [321]:
map_geopandas_fire(summary, ckeep=['firename', 'geometry', 'count'], clabel='firename', 
              cpop=['firename', 'count'], saveTo='./', saveName='2017fire', saveOnly=True)

In [275]:
hve_onfire = result[result.firename.notnull()]
hve_onfire.drop('geometry', axis=1).to_csv('/Users/jianglongli/Desktop/workbook/data/hve_onfire.csv', index=False)

+ **current fire**

In [325]:
hve_onfire_current = result_current[result_current.firename.notnull()]
summary_crrt = hve_onfire_current.firename.value_counts().rename('count')
summary_crrt = pd.concat([summary_crrt, gdf_current_ca[['firename', 'gisacres', 'geometry']].set_index('firename')], 
         axis=1, join='inner').reset_index().rename(columns={'index': 'firename'})
summary_crrt = transform_pd_to_gpd(summary_crrt, geometry='geometry')
summary_crrt.to_csv('/Users/jianglongli/Desktop/workbook/data/summary_crrt.csv', index=False)

In [328]:
map_geopandas_fire(summary_crrt, ckeep=['firename', 'geometry', 'count'], clabel='firename', 
              cpop=['firename', 'count'], saveTo='./', saveName='current_fire', saveOnly=True)

map_geopandas_fire(gdf[gdf.firename == 'ABNEY'], ckeep=['firename', 'geometry'], clabel='firename', 
              cpop=['firename'], saveTo='./', saveName='before_union', saveOnly=True)

map_geopandas_fire(gdf_union[gdf_union.firename == 'ABNEY'], ckeep=['firename', 'geometry'], clabel='firename', 
              cpop=['firename'], saveTo='./', saveName='after_union', saveOnly=True)

In [272]:
hve_onfire_current.drop('geometry', axis=1).to_csv('/Users/jianglongli/Desktop/workbook/data/hve_onfire_current.csv', 
                                           index=False)

**Download current fire KML file and transform to geopandas dataframe**

In [8]:
# df.to_csv('/Users/jianglongli/Downloads/cali_fire.csv', index=False)

### Matching with zipcode shapfiel

In [340]:
zipfile = wget.download('ftp://ftp2.census.gov/geo/tiger/TIGER2017/ZCTA5/tl_2017_us_zcta510.zip', 
              out='/Users/jianglongli/Desktop/workbook/data/')
# zfile = '/Users/jianglongli/Desktop/workbook/data//tl_2017_us_zcta510.zip'
with zipfile.ZipFile(zfile, "r") as zip_ref: 
    zip_ref.extractall(path='/Users/jianglongli/Desktop/workbook/data/') 

'/Users/jianglongli/Desktop/workbook/data//tl_2017_us_zcta510.zip'

In [9]:
gdfzip = readin_shapefile(folder_root + '/' + 'tl_2017_us_zcta510.shp')

In [10]:
%%time
result_zip = gpd.sjoin(gdf_union, gdfzip, how='left')

CPU times: user 7.78 s, sys: 41.8 ms, total: 7.82 s
Wall time: 7.83 s


In [16]:
result_zip

Unnamed: 0,firename,geometry,max_gisacres,start_date,end_date,index_right,aland10,awater10,classfp10,funcstat10,geoid10,intptlat10,intptlon10,mtfcc10,zcta5ce10
0,54 DUTCHMAN,POLYGON ((-123.3522620192062 40.18257224492159...,468.057746,2017-08-08 19:00:00,2017-08-08 19:00:00,20038,6.340445e+08,72396.0,B5,S,95595,+40.1231769,-123.4462791,G6350,95595
1,ABNEY,(POLYGON ((-123.1654099512362 41.9268791544161...,32889.335649,2017-10-03 23:04:00,2017-10-26 19:20:00,1243,7.410477e+08,1994206.0,B5,S,96050,+41.8988798,-122.8885521,G6350,96050
1,ABNEY,(POLYGON ((-123.1654099512362 41.9268791544161...,32889.335649,2017-10-03 23:04:00,2017-10-26 19:20:00,6796,6.652211e+08,1491104.0,B5,S,96086,+41.9242120,-123.2608091,G6350,96086
1,ABNEY,(POLYGON ((-123.1654099512362 41.9268791544161...,32889.335649,2017-10-03 23:04:00,2017-10-26 19:20:00,2063,4.212278e+08,89273.0,B5,S,97544,+42.1613770,-123.3024180,G6350,97544
1,ABNEY,(POLYGON ((-123.1654099512362 41.9268791544161...,32889.335649,2017-10-03 23:04:00,2017-10-26 19:20:00,2051,9.380070e+08,4250896.0,B5,S,97530,+42.1521475,-123.0574989,G6350,97530
2,ADOBE,POLYGON ((-122.5701099670042 38.38694398988907...,8200.897210,2017-10-10 14:47:00,2017-10-12 05:56:00,26081,1.418394e+07,0.0,B5,S,95405,+38.4371135,-122.6659930,G6350,95405
2,ADOBE,POLYGON ((-122.5701099670042 38.38694398988907...,8200.897210,2017-10-10 14:47:00,2017-10-12 05:56:00,26080,2.071345e+08,60660.0,B5,S,95404,+38.5303937,-122.5922622,G6350,95404
2,ADOBE,POLYGON ((-122.5701099670042 38.38694398988907...,8200.897210,2017-10-10 14:47:00,2017-10-12 05:56:00,26083,9.940114e+07,661638.0,B5,S,95409,+38.4688295,-122.5807239,G6350,95409
2,ADOBE,POLYGON ((-122.5701099670042 38.38694398988907...,8200.897210,2017-10-10 14:47:00,2017-10-12 05:56:00,31023,3.721777e+07,0.0,B5,S,95452,+38.4255231,-122.5352103,G6350,95452
2,ADOBE,POLYGON ((-122.5701099670042 38.38694398988907...,8200.897210,2017-10-10 14:47:00,2017-10-12 05:56:00,23754,6.322618e+07,146136.0,B5,S,95442,+38.3715388,-122.4873064,G6350,95442


In [11]:
result_zip.shape

(267, 15)

In [12]:
fire_zips = result_zip.geoid10.unique()

In [13]:
hve_zips = hve.zip.unique()

In [14]:
'94503' in fire_zips

False

In [21]:
zipall = pd.read_excel('/Users/jianglongli/Downloads/Copy of Wildfire_zip_code_1017_2017.xls')

In [25]:
zipall.shape

(152, 4)

In [24]:
zipall = zipall[zipall.COUNTY_NAME != 'ORANGE']

In [28]:
zipall.loc[:, 'zipcode'] = zipall.zipcode.astype(str)

In [31]:
len(set(zipall.zipcode) - set(fire_zips))

121

In [34]:
zipfile

<module 'zipfile' from '/Users/jianglongli/anaconda3/lib/python3.6/zipfile.py'>

In [20]:
fire_zips

array(['95595', '96050', '96086', '97544', '97530', '95405', '95404',
       '95409', '95452', '95442', '93454', '95568', nan, '94574', '94558',
       '94559', '94534', '93549', '95033', '95006', '96011', '95514',
       '96025', '92399', '95552', '96032', '96039', '93238', '92880',
       '92882', '92530', '92782', '92705', '92602', '92869', '92867',
       '92808', '92807', '92676', '95901', '95918', '95966', '95914',
       '96134', '95965', '96031', '96006', '96054', '93634', '95306',
       '95345', '95311', '95338', '95223', '95314', '95364', '95389',
       '96111', '96027', '96048', '96091', '95720', '96010', '93306',
       '95949', '93451', '93210', '91935', '93513', '93529', '93541',
       '96093', '83539', '93453', '93432', '92356', '92314', '93420',
       '95726', '93527', '92883', '91207', '91208', '91504', '91501',
       '91352', '91214', '91040', '91042', '91384', '95683', '95682',
       '93265', '95959', '95975', '96109', '89508', '89510', '92086',
       '92555',

In [17]:
fire_zips.size

151

In [35]:
len(set(hve_zips) - set(fire_zips))

92

In [352]:
gdf_union.shape

(168, 5)

In [348]:
gdfzip.shape

(33144, 10)

In [349]:
gdfzip.head()

Unnamed: 0,aland10,awater10,classfp10,funcstat10,geoid10,intptlat10,intptlon10,mtfcc10,zcta5ce10,geometry
0,63411475,157689,B5,S,43451,41.318301,-83.6174935,G6350,43451,"POLYGON ((-83.708733 41.327326, -83.708147 41...."
1,121783674,13437380,B5,S,43452,41.5157923,-82.9809454,G6350,43452,"POLYGON ((-83.086978 41.537796, -83.0825629999..."
2,9389362,999166,B5,S,43456,41.6468445,-82.8226641,G6350,43456,"(POLYGON ((-82.835577 41.710823, -82.83515 41...."
3,48035540,0,B5,S,43457,41.2673266,-83.4274645,G6350,43457,"POLYGON ((-83.49650299999999 41.253708, -83.48..."
4,2573816,39915,B5,S,43458,41.5304461,-83.2133648,G6350,43458,"POLYGON ((-83.222292 41.531025, -83.2222819999..."


In [350]:
gdfzip[gdfzip.geoid10 == '22102']

Unnamed: 0,aland10,awater10,classfp10,funcstat10,geoid10,intptlat10,intptlon10,mtfcc10,zcta5ce10,geometry
22657,27139748,169423,B5,S,22102,38.950951,-77.2295525,G6350,22102,"POLYGON ((-77.264211 38.951591, -77.263722 38...."


In [351]:
map_geopandas(gdfzip[gdfzip.geoid10 == '22102'], ckeep=['geoid10', 'geometry'], clabel='geoid10')

### Mapping

In [55]:
map_geopandas(gdf_active[gdf_active.state == 'CA'], ckeep=['firename', 'geometry'], clabel='firename')

# Reference

+ **GeoMAC data, part of USGS, this data provides file polygons**: https://www.geomac.gov/index.shtml 
    - shapefile: https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/
    - data attribute definition: 
        * https://www.nwcg.gov/sites/default/files/stds/WildlandFirePerimeters_definition.pdf
        * https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/historic_fire_data/perimeters_dd83_METADATA.htm
    - curent file perimeter methodology: https://www.geomac.gov/viewer/help/perimeters_active.html    
    - GeoMAC map viewer help and documentation:https://www.geomac.gov/viewer/help/Help.html
    - a 2008 publication about GeoMAC: https://pubs.usgs.gov/ds/612/pdf/ds612.pdf
    - a 2008 GeoMAC user guide: https://webarchive.library.unt.edu/eot2008/20080916004656/http://geomac.gov/pdf/UsersGuide/GeoMAC_UG.pdf


+ **USGS**: https://www.usgs.gov/centers/gecsc


+ **Data Basin view of GeoMAC**: https://databasin.org/datasets/6ed18e2a72e74b0d81e14c93d5b46f07


+ **NASA Fire Information for Resource Management System (FIRMS), mostly point data, near real time**: https://earthdata.nasa.gov/earth-observation-data/near-real-time/firms


+ **CA fire org, has google map fire range, but not sure how to get the shapefile**: http://www.calfire.ca.gov/general/firemaps
    - FRAP program from Cal Fire also has fire perimeter data: http://frap.fire.ca.gov/data/frapgisdata-sw-fireperimeters_download
    
    
+ **KML file tutorial**: https://developers.google.com/kml/documentation/kml_tut

# Note

**GeoMAC fire perimeters:**
his layer contains fire perimeters that are submitted to GeoMAC by field offices. The fire perimeters are updated every one or two days, as the data is made available. If we have received no new data, the "expired" layer is not replaced. The layer is replaced as soon as we receive an updated file. Perimeters are usually collected on a daily basis for large fires that are growing. However, there may be gaps in daily coverage.

The GeoMAC team attributes the perimeters using the IRWIN (Integrated Reporting of Wildland-Fire Information) system.

Perimeters are collected in the field by a variety of means, including infrared flights, and by using a GPS unit to map the perimeter. Please NOTE: GeoMAC only displays perimeter data as they are submitted by field offices. Since data are not received for all fires, you may not be able to view perimeters for every fire.

Perimeter data displayed in and delivered by the Geomac application is not the final or official perimeter for any incident and is provided for informational purposes only. The final official perimeter should be obtained from the host unit which can be determined by looking at the Unit Id for any specific fire. The host unit is responsible for producing official and final perimeters for all incidents in their jurisdiction.


**Cal Fire**: 
As part of the California Fire Plan, the Fire and Resource Assessment Program (FRAP) compiles fire perimeters and has established an on-going fire perimeter data capture process in order to update vegetative fuel rank maps. CAL FIRE/FRAP, the USDA Forest Service Region 5 Remote Sensing Lab, the Bureau of Land Management, and the National Park Service jointly develop the comprehensive fire perimeter GIS layer for public and private lands throughout California.

The fire perimeter database represents the most complete digital record of fire perimeters in California. However it is still incomplete in many respects. Fire perimeter database users must exercise caution to avoid inaccurate or erroneous conclusions. For more information on potential errors and their source please review the methodology section of these pages.

# Web Scraping reflection
+ try scrapy (scrapy vs beautifulsoup): https://blog.michaelyin.info/2017/08/10/scrapy-tutorial-1-scrapy-vs-beautiful-soup/

+ scrapy is a framework: https://hexfox.com/p/scrapy-vs-beautifulsoup/