# California 2017 Fire recovery geocoding part

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
from shapely.geometry import Point, LineString, MultiPolygon, asMultiPolygon, Polygon
from shapely import wkb, wkt
import shapely
import geopandas as gpd
from shapely.ops import unary_union
import requests
from bs4 import BeautifulSoup
import re
import os
import zipfile
import wget
from datetime import datetime
from multiprocessing.dummy import Pool as ThreadPool 
import geocoder
import geopy
%matplotlib inline

import sys
# sys.path.insert(0, '/Users/jianglongli/Desktop/workbook/Freddie_project/PostGIS/gisfeaturecode_v7/')
from mapping_utility_v2 import map_geopandas, map_AllHouses
from mapping_utility_fire import map_geopandas_fire
from python_postgis_talk_utility import transform_pd_to_gpd_general, transform_pd_to_gpd
from cali_fire_utility import geomatch, readin_shapefile, timer, fire_postprocessing, create_fire_union
from cali_fire_utility import download_and_create_shp, download_read_curent_fire
from cali_fire_utility import map_fires, geocode, multigeocoding, multigeocoding_and_repair

pd.options.display.max_columns = 100

In [2]:
import warnings
warnings.simplefilter("ignore")

In [3]:
url_cali = "https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/current_year_fire_data/California/"
url_master = "https://rmgsc.cr.usgs.gov"
folder_root = '/Users/jianglongli/Desktop/workbook/data/disaster_recovery'
folder_sub = 'cali_fire'

### read in hve turned off file and try matching

In [4]:
hve_raw = pd.read_csv('%s/cali_turnedoff/cali_turned_off.csv' % folder_root, dtype={'zip': str})
print('raw hve count: %s' % hve_raw.shape[0])

hve_raw.loc[:, 'long'] = hve_raw['long'].apply(lambda x: -x if x>0 else x)
hve = hve_raw[hve_raw.lat.notnull()]
print('valid lat/lng count: %s' % hve.shape[0])

hve_null = hve_raw[hve_raw.lat.isnull()]
hve_null.loc[:, 'address'] = hve_null.apply(lambda row: row['address'] + ',' + ' CA ' + row['zip'], axis=1)
sample = hve_null.sample(100)
print('null lat/lng hve: %s' % hve_null.shape[0])

raw hve count: 359266
valid lat/lng count: 324931
null lat/lng hve: 34335


In [15]:
%%time
hve_geocode = multigeocoding_and_repair(hve_null.sample(500), 20, 6, verb=False)

Status code Unknown from https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/find: ERROR - HTTPSConnectionPool(host='geocode.arcgis.com', port=443): Read timed out. (read timeout=5.0)
Status code Unknown from https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/find: ERROR - HTTPSConnectionPool(host='geocode.arcgis.com', port=443): Read timed out. (read timeout=5.0)
Status code Unknown from https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/find: ERROR - HTTPSConnectionPool(host='geocode.arcgis.com', port=443): Read timed out. (read timeout=5.0)
Status code Unknown from https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/find: ERROR - HTTPSConnectionPool(host='geocode.arcgis.com', port=443): Read timed out. (read timeout=5.0)
Status code Unknown from https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/find: ERROR - HTTPSConnectionPool(host='geocode.arcgis.com', port=443): Read timed out. (read timeout=5.0)


20 addresses has failed geocoding! 15:21:28
20 addresses has been repaired! 15:21:40
CPU times: user 9.97 s, sys: 747 ms, total: 10.7 s
Wall time: 2min 46s


In [16]:
assert(hve_geocode.geocd.hasnans == False)
hve_geocode.loc[:, 'lat'] = hve_geocode.geocd.apply(lambda x: x[0])
hve_geocode.loc[:, 'long'] = hve_geocode.geocd.apply(lambda x: x[1])
hve_geocode = hve_geocode.drop('geocd', axis=1)
hve_processed = pd.concat([hve, hve_geocode])

In [5]:
# hve_processed.to_csv(folder_root + '/' + 'cali_turned_off_geocoded.csv', index=False)

# Reference

+ **GeoMAC data, part of USGS, this data provides file polygons**: https://www.geomac.gov/index.shtml 
    - shapefile: https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/
    - data attribute definition: 
        * https://www.nwcg.gov/sites/default/files/stds/WildlandFirePerimeters_definition.pdf
        * https://rmgsc.cr.usgs.gov/outgoing/GeoMAC/historic_fire_data/perimeters_dd83_METADATA.htm
    - curent file perimeter methodology: https://www.geomac.gov/viewer/help/perimeters_active.html    
    - GeoMAC map viewer help and documentation:https://www.geomac.gov/viewer/help/Help.html
    - a 2008 publication about GeoMAC: https://pubs.usgs.gov/ds/612/pdf/ds612.pdf
    - a 2008 GeoMAC user guide: https://webarchive.library.unt.edu/eot2008/20080916004656/http://geomac.gov/pdf/UsersGuide/GeoMAC_UG.pdf


+ **USGS**: https://www.usgs.gov/centers/gecsc


+ **Data Basin view of GeoMAC**: https://databasin.org/datasets/6ed18e2a72e74b0d81e14c93d5b46f07


+ **NASA Fire Information for Resource Management System (FIRMS), mostly point data, near real time**: https://earthdata.nasa.gov/earth-observation-data/near-real-time/firms


+ **CA fire org, has google map fire range, but not sure how to get the shapefile**: http://www.calfire.ca.gov/general/firemaps
    - FRAP program from Cal Fire also has fire perimeter data: http://frap.fire.ca.gov/data/frapgisdata-sw-fireperimeters_download
    
    
+ **KML file tutorial**: https://developers.google.com/kml/documentation/kml_tut

# Note

**GeoMAC fire perimeters:**
his layer contains fire perimeters that are submitted to GeoMAC by field offices. The fire perimeters are updated every one or two days, as the data is made available. If we have received no new data, the "expired" layer is not replaced. The layer is replaced as soon as we receive an updated file. Perimeters are usually collected on a daily basis for large fires that are growing. However, there may be gaps in daily coverage.

The GeoMAC team attributes the perimeters using the IRWIN (Integrated Reporting of Wildland-Fire Information) system.

Perimeters are collected in the field by a variety of means, including infrared flights, and by using a GPS unit to map the perimeter. Please NOTE: GeoMAC only displays perimeter data as they are submitted by field offices. Since data are not received for all fires, you may not be able to view perimeters for every fire.

Perimeter data displayed in and delivered by the Geomac application is not the final or official perimeter for any incident and is provided for informational purposes only. The final official perimeter should be obtained from the host unit which can be determined by looking at the Unit Id for any specific fire. The host unit is responsible for producing official and final perimeters for all incidents in their jurisdiction.


**Cal Fire**: 
As part of the California Fire Plan, the Fire and Resource Assessment Program (FRAP) compiles fire perimeters and has established an on-going fire perimeter data capture process in order to update vegetative fuel rank maps. CAL FIRE/FRAP, the USDA Forest Service Region 5 Remote Sensing Lab, the Bureau of Land Management, and the National Park Service jointly develop the comprehensive fire perimeter GIS layer for public and private lands throughout California.

The fire perimeter database represents the most complete digital record of fire perimeters in California. However it is still incomplete in many respects. Fire perimeter database users must exercise caution to avoid inaccurate or erroneous conclusions. For more information on potential errors and their source please review the methodology section of these pages.

# Web Scraping reflection
+ try scrapy (scrapy vs beautifulsoup): https://blog.michaelyin.info/2017/08/10/scrapy-tutorial-1-scrapy-vs-beautiful-soup/

+ scrapy is a framework: https://hexfox.com/p/scrapy-vs-beautifulsoup/