In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
%matplotlib inline

LULC_LAP_FILEPATH = '../data/LULC_LAP_WOH.csv'
LAP_GEOJSON_FILEPATH = '../data/LAP.geojson'
LU_LAP_AGG_FILEPATH = '../data/LAP_landuse_acres.csv'
LU_CODE_METADATA_FILEPATH = '../data/LandUseCodes_DEP.csv'

# LAP EDA

In [None]:
# original lap dataset, includes both WOH and EOH
lap = gpd.read_file(LAP_GEOJSON_FILEPATH, parse_dates=['CLOSING_DT'])
lap = lap[lap.Acquisitio=='LAP']
lap.head(2)

In [None]:
lap.groupby('County').GIS_Acres.sum()

In [None]:
print('EOH + WOH:')
print(lap.SWIS_SBL.nunique(), lap.PROP_ID.nunique(), lap.shape, lap.GIS_Acres.sum())
print()
print('WOH only:')
woh = lap[lap.SOH=='West']
print(woh.SWIS_SBL.nunique(), woh.PROP_ID.nunique(), woh.shape, woh.GIS_Acres.sum())

In [None]:
# sizes of properties - whether EOH, WOH, LAP or non-LAP
lap.groupby('PROP_ID').GIS_Acres.sum().describe()
# average size of plot is 130 acres
# median size of plot is 47 acres
# largest plot is 83k acres

In [None]:
# LAP lands only
print('LAP acreage statistics')
print('EOH + WOH:')
print(lap[lap.Acquisitio=='LAP'].groupby('PROP_ID').GIS_Acres.sum().describe())
print()
print('WOH only:')
print(lap[(lap.Acquisitio=='LAP') & (lap.SOH=='West')].groupby('PROP_ID').GIS_Acres.sum().describe())
print()
print('EOH only:')
print(lap[(lap.Acquisitio=='LAP') & (lap.SOH=='East')].groupby('PROP_ID').GIS_Acres.sum().describe())

# LULC-inner joined-LAP EDA

In [None]:
# LAP inner joined with LULC, only kept most of WOH properties 
lulc_lap = pd.read_csv(LULC_LAP_FILEPATH, parse_dates=['closing_date'])
lulc_lap = lulc_lap[lulc_lap.acquisition_method=='LAP'] # use only those acquired under LAP
lulc_lap.head()

In [None]:
print('LULC-inner joined-LAP:')
print(lulc_lap.SWIS_SBL.nunique(), lulc_lap.PROP_ID.nunique(), lulc_lap.shape, lulc_lap.property_acres.sum())

In [None]:
# percent acreage that LULC_LAP accounts for in WOH
lulc_lap_acres = lulc_lap.landuse_acres.sum()
lulc_lap_prop_acres = lulc_lap[['FID_LAP', 'property_acres']].drop_duplicates().property_acres.sum()
lulc_lap_acres/lulc_lap_prop_acres

In [None]:
# percent WOH PROP_IDs accounted for in LULC_LAP - specifically those under LAP
lulc_lap_ids = set(lulc_lap.PROP_ID)
woh_ids = set(woh[woh.Acquisitio=='LAP'].PROP_ID)

len(lulc_lap_ids.intersection(woh_ids)) / len(woh_ids)

# Aggregate landuse acres per annum

In [None]:
lu_agg = lulc_lap.pivot_table(index=lulc_lap.closing_date.dt.year, 
                     columns='landuse_code',
                     values='landuse_acres',
                     aggfunc='sum',
                     fill_value=0)

lu_agg.index = lu_agg.index.astype('int') # convert year from float to int
lu_agg = lu_agg.drop(index=2020)          # drop 2020 since it is an incomplete year
lu_agg

In [None]:
lu_agg.sum().sort_values(ascending=True).plot.barh(figsize=(5,8), 
                                                   title='Total Acres of each Landuse Code')
plt.xlabel('Acres')
plt.ylabel('Landuse code')

In [None]:
lu_agg.to_csv(LU_LAP_AGG_FILEPATH)

# Merge minor landuse codes into 'Others'

In [None]:
# total acres of each land use
landuse_codes_to_merge = lu_agg.sum().sort_values(ascending=False)[8:].index
landuse_codes_to_merge

In [None]:
lu_agg['Others'] = lu_agg[landuse_codes_to_merge].sum(axis=1)
lu_agg = lu_agg.drop(columns=landuse_codes_to_merge)
lu_agg.head()

In [None]:
lu_desc = pd.read_csv(LU_CODE_METADATA_FILEPATH, index_col=0)
lu_desc[lu_desc.LU_Code.isin(lu_agg.columns)]

In [None]:
LANDUSE_MAPPING = {1110: 'Urban/Built-up Land',
                   1190: 'Urban/Built-up Land',
                   2100: 'Agricultural Land',
                   2400: 'Agricultural Land',
                   3000: 'Forest Land',
                   4100: 'Forest Land',
                   4200: 'Forest Land',
                   4300: 'Forest Land',
                   5000: 'Others', 1120: 'Others', 1790: 'Others', 
                   1730: 'Others', 2220: 'Others', 1402: 'Others',
                   7500: 'Others', 2410: 'Others', 1430: 'Others',
                   1210: 'Others', 1713: 'Others', 1130: 'Others',
                   1401: 'Others', 1220: 'Others', 1714: 'Others',
                   1230: 'Others', 1400: 'Others', 1250: 'Others',
                   1240: 'Others', 2210: 'Others', 1712: 'Others',
                   1711: 'Others', 1140: 'Others'
}

In [None]:
lulc_lap['landuse'] = lulc_lap.landuse_code.replace(LANDUSE_MAPPING)
lu_agg = lulc_lap.pivot_table(index=lulc_lap.closing_date.dt.year, 
                     columns='landuse',
                     values='landuse_acres',
                     aggfunc='sum',
                     fill_value=0)
lu_agg.index = lu_agg.index.astype('int')
lu_agg = lu_agg.drop(index=2020)
lu_agg

In [None]:
lu_agg.plot.bar(figsize=(12,9), 
                 stacked=True, 
#                  cmap='tab10', 
                 legend='reverse',
                 title='Acres Acquired (1997-2019)',
                 xticks=lu_agg.index)
plt.legend(title='Landuse Codes')
plt.xlabel('Year')
plt.ylabel('Acres')

# Export as csv

In [None]:
lulc_lap = lulc_lap.sort_values(['PROP_ID', 'closing_date'])\
    .reset_index(drop=True)
lulc_lap.head()

In [None]:
lulc_lap = lulc_lap.dropna(subset=['closing_date'])
lulc_lap['year'] = lulc_lap.closing_date.dt.year
lulc_lap['year'] = lulc_lap.year.astype('int')
lulc_lap.head(20)

In [None]:
lulc_lap = lulc_lap.reset_index(drop=True)

In [None]:
lulc_lap.shape

In [None]:
lulc_lap.to_csv(LU_LAP_AGG_FILEPATH)

In [None]:
# eda for report 
LAP_ACRES_FILEPATH = '../data/LAP_landuse_acres.csv'
df = pd.read_csv(LAP_ACRES_FILEPATH, index_col=0)
df.head()

In [None]:
landuse_dist = df.groupby('landuse').landuse_acres.sum().sort_values()
landuse_dist/landuse_dist.sum()

# Export as json

In [None]:
import pandas as pd
LAP_ACRES_FILEPATH = '../data/LAP_landuse_acres.csv'
WOH_COUNTIES = ['Delaware', 'Greene', 'Schoharie',
                'Sullivan', 'Ulster']

df = pd.read_csv(LAP_ACRES_FILEPATH, index_col=0)
df.head()

In [None]:
data = {}
county = 'Delaware'
landuse_type = 'Forest Land'
for county in WOH_COUNTIES:
    data[county] = df[(df.county==county)].pivot_table(columns='landuse', 
                                                       index='year', 
                                                       values='landuse_acres', 
                                                       aggfunc='sum', 
                                                       fill_value=0).to_dict()


In [None]:
import json
with open('../output/lap_woh_with_landuse.json', 'w') as f:
    json.dump(data, f)

In [None]:
# aggregate by landuse first
with open('../output/lap_landuse_acres_sum.json', 'w') as f:
    data = df.pivot_table(index='year', 
                   columns='landuse',
                   values='landuse_acres', 
                   aggfunc='sum',fill_value=0).to_dict()
    json.dump(data, f, indent=2)