In [3]:
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import polygon
from shapely.validation import make_valid
from shapely.validation import explain_validity
import fiona
import rtree
import pyproj


## COME BACK TO THESE WHEN THINGS GO WRONG

In [26]:
# File paths
z_usfs_th = '../data/source/usfs/S_USA.Activity_TimberHarvest.gdb.zip'
z_usfs_hft = '../data/source/usfs/S_USA.Activity_HazFuelTrt_PL.gdb.zip'
calfire_th = '../data/source/calfire/ds816/ds816.gdb'
calfire_ntmp = '../data/source/calfire/CAL_FIRE_Nonindustrial_Timber_Management_Plans_and_Notices_TA83.geojson'
z_calfire_rx = '../data/source/calfire/California_Wildland_Fire_Perimeters_(All)/California_Wildland_Fire_Perimeters_(All).shp'
calfire_calmapper = '../data/source/calfire/FuelTreatments_CALFIRE22_1_public.gdb.zip'
z_census_states = '../data/source/census/tl_2020_us_state.zip'
z_census_counties = '../data/source/census/tl_2021_us_county.zip'

In [32]:
# Geodataframes

## Raw
usfs_th_raw_df = gpd.read_file(z_usfs_th) # Forest Service timber harvest
usfs_hft_raw_df = gpd.read_file(z_usfs_hft) # Forest Service hazardous fuel treatment
calfire_th_raw_df = gpd.read_file(calfire_th) # CalFire timber harvest
calfire_th_raw_df['ID'] = calfire_th_raw_df.index + 1 #For some reason the OBJECTID column wasn't showing up but it is a sequential count, so this hacky index workaround should work.
calfire_ntmp_raw_df = gpd.read_file(calfire_ntmp) # CalFire non-industrial timber management plans
calfire_rx_raw_df = gpd.read_file(z_calfire_rx) # CalFire prescribed burns
calfire_calmapper_raw_df = gpd.read_file(calfire_calmapper) # CalFire CALMAPPER (California Management Activity Project Planning and Event Reporter)
states_raw_df = gpd.read_file(z_census_states)
counties_raw_df = gpd.read_file(z_census_counties)

# TO DO
## [DONE] GOTTA VERIFY RX BECAUSE PREVIOUS URL RETURNED 404
## [DONE] GET calmapper set
## Sort out which columns map to which in Clarke's
## Map intensity
## Spot check .head() resutls against QGIS?


In [82]:
## Working copies
usfs_th_df = usfs_th_raw_df.copy()
usfs_hft_df = usfs_hft_raw_df.copy()
calfire_th_df = calfire_th_raw_df.copy()
calfire_ntmp_df = calfire_ntmp_raw_df.copy()
calfire_rx_df = calfire_rx_raw_df.copy()
calfire_calmapper_df = calfire_calmapper_raw_df.copy()
states_df = states_raw_df.copy()
counties_df = counties_raw_df.copy()

## Select only Calif. projects from USFS sets
usfs_th_ca_df = usfs_th_df.loc[usfs_th_df['STATE_ABBR'] == 'CA'].copy()
usfs_hft_ca_df = usfs_hft_df.loc[usfs_hft_df['STATE_ABBR'] == 'CA'].copy()

## Select only Calif. state and county shapes
ca_df = states_df[states_df['STUSPS'] == 'CA'].copy()
ca_counties_df = counties_df[counties_df['STATEFP'] == '06'].copy()


## SOME CLEANINGS, ROW COUNTS AND BASIC SUMS

In [83]:
# RENAME/MUNGE COLUMNS TO MATCH ACROSS DATASETS

usfs_th_ca_df.rename(columns = {'SUID':'ID', 'ACTIVITY_NAME':'SILV1', 'TREATMENT_TYPE':'SILV2', 'FY_PLANNED':'FY', 'FY_COMPLETED':'COMPLETED', 'OWNERSHIP_DESC':'OWNER', 'NBR_UNITS_ACCOMPLISHED':'ACRES'}, inplace = True)
usfs_th_ca_df['FY'] = pd.to_datetime(usfs_th_ca_df['FY'], format='%Y', errors='coerce')
usfs_th_ca_df['COMPLETED'] = pd.to_datetime(usfs_th_ca_df['COMPLETED'], format='%Y', errors='coerce')
usfs_th_ca_df['STATUS'] = 'usfs_th'

usfs_hft_ca_df.rename(columns = {'SUID':'ID', 'ACTIVITY':'SILV1', 'TREATMENT_TYPE':'SILV2', 'FISCAL_YEAR_PLANNED':'FY', 'FISCAL_YEAR_COMPLETED':'COMPLETED', 'FS_UNIT_NAME':'OWNER', 'NBR_UNITS_ACCOMPLISHED':'ACRES'}, inplace = True)
usfs_hft_ca_df['FY'] = pd.to_datetime(usfs_hft_ca_df['FY'], format='%Y', errors='coerce')
usfs_hft_ca_df['COMPLETED'] = pd.to_datetime(usfs_hft_ca_df['COMPLETED'], format='%Y', errors='coerce')
usfs_hft_ca_df['STATUS'] = 'usfs_hft'

calfire_th_df.rename(columns = {'SILVI_1':'SILV1', 'SILVI_CAT':'SILV2', 'LANDOWNER':'OWNER', 'GIS_ACRES':'ACRES'}, inplace = True)
calfire_th_df['FY'] = pd.to_datetime(calfire_th_df['APPROVED'].str[:4], format='%Y', errors='coerce')
calfire_th_df['COMPLETED'] = pd.to_datetime(calfire_th_df['COMPLETED'].str[:4], format='%Y', errors='coerce')
calfire_th_df['STATUS'] = 'private'

calfire_ntmp_df.rename(columns = {'OBJECTID':'ID', 'SILVI_1':'SILV1', 'SILVI_CAT':'SILV2', 'LANDOWNER':'OWNER', 'GIS_ACRES':'ACRES'}, inplace = True)
calfire_ntmp_df['FY'] = pd.to_datetime(calfire_ntmp_df['APPROVED'].str[:4], format='%Y', errors='coerce')
calfire_ntmp_df['COMPLETED'] = pd.to_datetime(calfire_ntmp_df['CANCELLED'].str[:4], format='%Y', errors='coerce') #Clarke uses cancelled as completed here. I think we should probalby just say that completion is not tracked here ... which seems to be the case. Either way this is probably not that big of a deal because this is a small subset of the overall total, I think ...
calfire_ntmp_df['STATUS'] = 'private'


In [84]:
# SELECT SEVEN COLUMNS FOR A CLEANER SUBSET 
usfs_th_ca_clean_df = usfs_th_ca_df.loc[:, ('ID', 'SILV1', 'SILV2', 'FY', 'COMPLETED', 'OWNER', 'ACRES')]
usfs_hft_ca_clean_df = usfs_hft_ca_df.loc[:, ('ID', 'SILV1', 'SILV2', 'FY', 'COMPLETED', 'OWNER', 'ACRES')]
calfire_th_clean_df = calfire_th_df.loc[:, ('ID', 'SILV1', 'SILV2', 'FY', 'COMPLETED', 'OWNER', 'ACRES')]
calfire_ntmp_clean_df = calfire_ntmp_df.loc[:, ('ID', 'SILV1', 'SILV2', 'FY', 'COMPLETED', 'OWNER', 'ACRES')]
print(len(usfs_th_ca_clean_df.index))
print(len(usfs_hft_ca_clean_df.index))
print(len(calfire_th_clean_df.index))
print(len(calfire_ntmp_clean_df.index))

80446
82435
74193
5905


In [85]:
# FILTER CALFIRE FOR ONLY COMPLETED WORK 
calfire_th_onlycompleted_df = calfire_th_df[calfire_th_raw_df['PLAN_STAT'] == 'Completed'].copy()
calfire_th_onlycompleted_clean_df = calfire_th_onlycompleted_df.loc[:, ('ID', 'SILV1', 'SILV2', 'FY', 'COMPLETED', 'OWNER', 'ACRES')]
print(len(calfire_th_df.index))
print(len(calfire_th_onlycompleted_clean_df.index))

74193
35563


In [119]:
# DEDUPE USFS timber harvest 
usfs_th_ca_dedupe_df = usfs_th_ca_df.drop_duplicates(['ID', 'GIS_ACRES']).copy()
usfs_th_ca_dedupe_clean_df = usfs_th_ca_dedupe_df.loc[:, ('ID', 'SILV1', 'SILV2', 'FY', 'COMPLETED', 'OWNER', 'ACRES')]
print(len(usfs_th_df.index))
print(len(usfs_th_ca_dedupe_clean_df.index))

787170
68514


In [120]:
# DEDUPE USFS hazardous fuel treatments
usfs_hft_ca_dedupe_df = usfs_hft_ca_df.drop_duplicates(['ID', 'GIS_ACRES']).copy()
usfs_hft_ca_dedupe_clean_df = usfs_hft_ca_dedupe_df.loc[:, ('ID', 'SILV1', 'SILV2', 'FY', 'COMPLETED', 'OWNER', 'ACRES')]
print(len(usfs_hft_df.index))
print(len(usfs_hft_ca_dedupe_clean_df.index))

480353
40416


In [121]:
# SUM ACRES
print('SUM of usfs_th_ca_clean_df["ACRES"] = ' + str(round(usfs_th_ca_clean_df['ACRES'].sum()))) # ACTIVITY ACRES (i.e., with multiple treatments on same terrain)
print('SUM of usfs_hft_ca_clean_df["ACRES"] = ' + str(round(usfs_hft_ca_clean_df['ACRES'].sum()))) # ACTIVITY ACRES (i.e., with multiple treatments on same terrain)
print('SUM of usfs_th_ca_dedupe_clean_df["ACRES"] = ' + str(round(usfs_th_ca_dedupe_clean_df['ACRES'].sum())))
print('SUM of usfs_hft_ca_dedupe_clean_df["ACRES"] = ' + str(round(usfs_hft_ca_dedupe_clean_df['ACRES'].sum())))
print('SUM of calfire_th_df["ACRES"] = ' + str(round(calfire_th_df['ACRES'].sum())))
print('SUM of calfire_th_onlycompleted_df["ACRES"] = ' + str(round(calfire_th_onlycompleted_df['ACRES'].sum())))
print('SUM of calfire_ntmp_clean_df["ACRES"] = ' + str(round(calfire_ntmp_clean_df['ACRES'].sum())))

SUM of usfs_th_ca_clean_df["ACRES"] = 2680494
SUM of usfs_hft_ca_clean_df["ACRES"] = 4481875
SUM of usfs_th_ca_dedupe_clean_df["ACRES"] = 2229560
SUM of usfs_hft_ca_dedupe_clean_df["ACRES"] = 3215359
SUM of calfire_th_df["ACRES"] = 1592893
SUM of calfire_th_onlycompleted_df["ACRES"] = 756736
SUM of calfire_ntmp_clean_df["ACRES"] = 378542


## ACTIVITY ACRES (PRESERVING DUPLICATION) and PLANNED ACRES

In [101]:
# SUM ACRES OF BOTH USFS DATASETS 
usfs_th_hft_ca_clean_df = pd.concat([usfs_hft_ca_clean_df, usfs_th_ca_clean_df])
print(len(usfs_th_hft_ca_clean_df.index))
print('SUM of usfs_th_hft_ca_clean_df["ACRES"] = ' + str(round(usfs_th_hft_ca_clean_df['ACRES'].sum())))

162881
SUM of usfs_th_hft_ca_clean_df["ACRES"] = 7162369


In [102]:
# SUM ACRES OF BOTH CALFIRE DATASETS 
calfire_th_ntmp_clean_df = pd.concat([calfire_th_clean_df, calfire_ntmp_clean_df])
print(len(calfire_th_ntmp_clean_df.index))
print('SUM of calfire_th_ntmp_clean_df["ACRES"] = ' + str(round(calfire_th_ntmp_clean_df['ACRES'].sum())))

80098
SUM of calfire_th_ntmp_clean_df["ACRES"] = 1971436


In [103]:
# LIMIT TO ACTIVITY AFTER 1984
usfs_th_hft_ca_clean_84_to_pres_df = usfs_th_hft_ca_clean_df[usfs_th_hft_ca_clean_df['FY'] >= '1984-01-01']
print(len(usfs_th_hft_ca_clean_84_to_pres_df.index))
print('SUM of usfs_th_hft_ca_clean_84_to_pres_df["ACRES"] = ' + str(round(usfs_th_hft_ca_clean_84_to_pres_df['ACRES'].sum())))
calfire_th_ntmp_clean_84_to_pres_df = calfire_th_ntmp_clean_df[calfire_th_ntmp_clean_df['FY'] >= '1984-01-01']
print(len(calfire_th_ntmp_clean_84_to_pres_df.index))
print('SUM of calfire_th_ntmp_clean_84_to_pres_df["ACRES"] = ' + str(round(calfire_th_ntmp_clean_84_to_pres_df['ACRES'].sum())))

145088
SUM of usfs_th_hft_ca_clean_84_to_pres_df["ACRES"] = 6763547
79907
SUM of calfire_th_ntmp_clean_84_to_pres_df["ACRES"] = 1962800


In [113]:
# LIMIT CALFIRE TO COMPLETED
calfire_th_ntmp_clean_84_to_pres_completed_df = calfire_th_ntmp_clean_84_to_pres_df[calfire_th_ntmp_clean_84_to_pres_df['COMPLETED'] >= '1984-01-01']
print(len(calfire_th_ntmp_clean_84_to_pres_completed_df.index))
print('SUM of calfire_th_ntmp_clean_84_to_pres_completed_df["ACRES"] = ' + str(round(calfire_th_ntmp_clean_84_to_pres_completed_df['ACRES'].sum())))

38263
SUM of calfire_th_ntmp_clean_84_to_pres_completed_df["ACRES"] = 819384


In [105]:
# CONCATENATE USFS + CALFIRE DATA AND FILTER FOR COMPLETED
usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_df = pd.concat([usfs_th_hft_ca_clean_84_to_pres_df, calfire_th_ntmp_clean_84_to_pres_df])
print(len(usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_df.index))
print('SUM of usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_df["ACRES"] = ' + str(round(usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_df['ACRES'].sum())))
usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_completed_df = usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_df[usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_df['COMPLETED'] >= '1984-01-01']
print(len(usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_completed_df.index))
print('SUM of usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_completed_df["ACRES"] = ' + str(round(usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_completed_df['ACRES'].sum())))

224995
SUM of usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_df["ACRES"] = 8726346
143283
SUM of usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_completed_df["ACRES"] = 6770983


In [106]:
# FILTER FOR NOT-YET-COMPLETED, "PLANNED" ACRES
usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_planned_df = usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_df[usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_df['COMPLETED'].isna()]
print(len(usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_planned_df.index))
print('SUM of usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_planned_df["ACRES"] = ' + str(round(usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_planned_df['ACRES'].sum())))

81712
SUM of usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_planned_df["ACRES"] = 1955363


## GETTING TO FOOTPRINT ACRES

In [122]:
# Remove inter-duplication across USFS timber harvest and haz fuels
usfs_th_hft_ca_clean_deduped_df = usfs_th_hft_ca_clean_df.drop_duplicates(['ID', 'ACRES']).copy()
print(len(usfs_th_hft_ca_clean_df.index))
print('SUM of usfs_th_hft_ca_clean_df["ACRES"] = ' + str(round(usfs_th_hft_ca_clean_df['ACRES'].sum())))
print(len(usfs_th_hft_ca_clean_deduped_df.index))
print('SUM of usfs_th_hft_ca_clean_deduped_df["ACRES"] = ' + str(round(usfs_th_hft_ca_clean_deduped_df['ACRES'].sum())))

162881
SUM of usfs_th_hft_ca_clean_df["ACRES"] = 7162369
106315
SUM of usfs_th_hft_ca_clean_deduped_df["ACRES"] = 5327766


In [123]:
# Filter 1984 to present
usfs_th_hft_ca_clean_deduped_84_to_pres_df = usfs_th_hft_ca_clean_deduped_df[usfs_th_hft_ca_clean_deduped_df['FY'] >= '1984-01-01']
print(len(usfs_th_hft_ca_clean_deduped_84_to_pres_df.index))
print('SUM of usfs_th_hft_ca_clean_deduped_84_to_pres_df["ACRES"] = ' + str(round(usfs_th_hft_ca_clean_deduped_84_to_pres_df['ACRES'].sum())))

90129
SUM of usfs_th_hft_ca_clean_deduped_84_to_pres_df["ACRES"] = 4962535


In [124]:
# No deduping in CalFire data sets
## At this point Clarke does this stuff that I did earlier/above because it seemed to go better with the other stuff of the same filterings ...
print(len(calfire_th_ntmp_clean_84_to_pres_df.index))
print('SUM of calfire_th_ntmp_clean_84_to_pres_df["ACRES"] = ' + str(round(calfire_th_ntmp_clean_84_to_pres_df['ACRES'].sum())))
print(len(calfire_th_ntmp_clean_84_to_pres_completed_df.index))
print('SUM of calfire_th_ntmp_clean_84_to_pres_completed_df["ACRES"] = ' + str(round(calfire_th_ntmp_clean_84_to_pres_completed_df['ACRES'].sum())))

# THESE NUMBERS ARE VERY VERY DIFFERENT FROM CLARKE'S ... CHECK CHECK ... I THINK IT MIGHT HAVE SOMETHING TO DO WITH HOW I'M CONVERTING THE DATE INFO STRING TO DATETIME FORMAT

79907
SUM of calfire_th_ntmp_clean_84_to_pres_df["ACRES"] = 1962800
38263
SUM of calfire_th_ntmp_clean_84_to_pres_completed_df["ACRES"] = 819384


In [117]:
# Join USFS deduped sets and CalFire
usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_df = pd.concat([usfs_th_hft_ca_clean_deduped_84_to_pres_df, calfire_th_ntmp_clean_84_to_pres_df])
print(len(usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_df.index))
print('SUM of usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_df["ACRES"] = ' + str(round(usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_df['ACRES'].sum())))

170036
SUM of usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_df["ACRES"] = 6925335


In [126]:
# Filter above to just completed acres
usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_completed_df = usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_df[usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_df['COMPLETED'] >= '1984-01-01']
print(len(usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_completed_df.index))
print('SUM of usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_completed_df["ACRES"] = ' + str(round(usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_completed_df['ACRES'].sum())))

107318
SUM of usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_completed_df["ACRES"] = 5422981


In [128]:
# Preserve planned acres
usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_planned_df = usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_df[usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_df['COMPLETED'].isna()]
print(len(usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_planned_df.index))
print('SUM of usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_planned_df["ACRES"] = ' + str(round(usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_planned_df['ACRES'].sum())))

62718
SUM of usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_planned_df["ACRES"] = 1502354


In [136]:
# This brings us up to line 273 in Clarke's R file: CECS_DataCleaning_7June2021.R

In [137]:
# Completed footprint acres by year, not yet including calfire_calmapper_df, calfire_rx_df
round(usfs_th_hft_ca_deduped_calfire_th_ntmp_clean_84_to_pres_completed_df.groupby(['COMPLETED'])['ACRES'].sum(),0)

COMPLETED
1984-01-01     26315.0
1985-01-01     33871.0
1986-01-01     51006.0
1987-01-01     50607.0
1988-01-01     73154.0
1989-01-01     69834.0
1990-01-01    130631.0
1991-01-01     97457.0
1992-01-01    129840.0
1993-01-01     76738.0
1994-01-01     77057.0
1995-01-01     55732.0
1996-01-01     75010.0
1997-01-01     64001.0
1998-01-01     98266.0
1999-01-01     46408.0
2000-01-01     46573.0
2001-01-01     35154.0
2002-01-01     62202.0
2003-01-01    114937.0
2004-01-01    113165.0
2005-01-01     95582.0
2006-01-01    105221.0
2007-01-01    145863.0
2008-01-01    128640.0
2009-01-01    175482.0
2010-01-01    130927.0
2011-01-01    158174.0
2012-01-01    210080.0
2013-01-01    203708.0
2014-01-01    259295.0
2015-01-01    189991.0
2016-01-01    495497.0
2017-01-01    414649.0
2018-01-01    362563.0
2019-01-01    175963.0
2020-01-01    249647.0
2021-01-01    385822.0
2022-01-01      7922.0
Name: ACRES, dtype: float64

In [138]:
# Completed activity acres by year, not yet including calfire_calmapper_df, calfire_rx_df
round(usfs_th_hft_ca_calfire_th_ntmp_clean_84_to_pres_completed_df.groupby(['COMPLETED'])['ACRES'].sum(),0)

COMPLETED
1984-01-01     29151.0
1985-01-01     37316.0
1986-01-01     55677.0
1987-01-01     56031.0
1988-01-01     79717.0
1989-01-01     79807.0
1990-01-01    151181.0
1991-01-01    102576.0
1992-01-01    139117.0
1993-01-01     81699.0
1994-01-01     79783.0
1995-01-01     64800.0
1996-01-01     85462.0
1997-01-01     74368.0
1998-01-01    115439.0
1999-01-01     55166.0
2000-01-01     59359.0
2001-01-01     39599.0
2002-01-01     69130.0
2003-01-01    129892.0
2004-01-01    135748.0
2005-01-01    118272.0
2006-01-01    131114.0
2007-01-01    190386.0
2008-01-01    187517.0
2009-01-01    234064.0
2010-01-01    198994.0
2011-01-01    224349.0
2012-01-01    279996.0
2013-01-01    270387.0
2014-01-01    331440.0
2015-01-01    273294.0
2016-01-01    582887.0
2017-01-01    505088.0
2018-01-01    472299.0
2019-01-01    259297.0
2020-01-01    336204.0
2021-01-01    439265.0
2022-01-01     15112.0
Name: ACRES, dtype: float64

In [229]:
# # Todo
# ## Fix geometries and add a column that notes if they've been fixed, remove NoneTypes, then try this again.

# usfs_th_df_val = usfs_th_df 
# usfs_th_df_val['VALIDATED'] = ''

# for idx, row in usfs_th_df_val.iterrows():
#     g = row['geometry']
#     if isinstance(g, type(None)):
#         usfs_th_df_val.drop(index=idx)
#     elif g.is_valid == False:
#         usfs_th_df_val.iat[idx, 68] = make_valid(g)
#         usfs_th_df_val.iat[idx, 69] = 1
#     else:
#         usfs_th_df_val.iat[idx, 69] = 0
    

# # res_union = ca_df.overlay(usfs_th_df, how='union')
# # res_union

# # Done
# ## Select USFS projects in CA only
# ## Filter USFS sets to Calif. only

In [27]:
# s = usfs_th_df.is_valid

# with open("is_valid.txt", "a") as o:
#     o.write(str(s))

for idx, row in usfs_th_df.iterrows():
    g = row['geometry']
    with open('is_valid.txt', 'a') as o:
        if isinstance(g, type(None)):
            o.write(str(idx) + ', NoneType\n')
        elif g.is_valid == False:
            make_valid(g)
            o.write(str(idx) + ', ' + str(make_valid(g)) + '\n')
        else:
            o.write(str(idx) + ', already valid\n')

    # print(row['geometry'])
    # with open("is_valid.txt", "a") as o:
    #     if isinstance(row['geometry'], type(None)):
    #         o.write("NoneType")
    #     else:
    #         o.write(str(idx) + ", " + str(row['geometry'].is_valid))