# Load Data and Packages

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas
%matplotlib inline

# input csv
LAP_FILEPATH = '../data/LAP.geojson'
COUNTY_AREA_FILEPATH = '../data/county_area.csv'
LULC_FILEPATH = '../data/LULC_LAP_WOH_intersection.geojson'

# output csv
LAND_COUNTS_FILEPATH = '../data/LAP_annual_counts.csv'
LAND_ACRES_FILEPATH = '../data/LAP_annual_acres.csv'
LAND_ACRES_NORM_FILEPATH = '../data/LAP_annual_acres_normalized.csv'

WOH_COUNTIES = ['Delaware', 'Greene', 'Schoharie', 'Sullivan', 'Ulster']
EOH_COUNTIES = ['Dutchess', 'Putnam', 'Westchester']

In [None]:
df = geopandas.read_file(LAP_FILEPATH)
df.head(2)

In [None]:
# creating a df out of a subset of the geopandas df
df['CLOSING_DT'] = pd.to_datetime(df.CLOSING_DT)
df['year'] = df['CLOSING_DT'].dt.year
df.head(2)

In [None]:
areas = pd.read_csv(COUNTY_AREA_FILEPATH, index_col=0)
areas = areas.apply(lambda row: row.astype('int'), axis=1)
areas

# Summary Statistics

In [None]:
ax = df.plot(figsize=(10,8), 
              column='County', 
              legend=True, 
              legend_kwds={'loc':3, 'title':'County'})
ax.set_title('NYC DEP owned lands')

In [None]:
# land acquired area / county area
(df[df.Acquisitio=='LAP'].groupby('County').agg({'GIS_Acres': 'sum'})\
     .GIS_Acres/areas.acres)\
    .sort_values(ascending=False)

In [None]:
df.CLOSING_DT\
    .value_counts(sort=False)\
    .plot(figsize=(10,5),
          title='Closing Date of Acquired Land')

In [None]:
# plot with county, ensure that the numbers are for acquired lands only 
_, axes = plt.subplots(1, 2, sharex=True)
titles = ['West of Hudson', 'East of Hudson']
for i, county_grp in enumerate([WOH_COUNTIES, EOH_COUNTIES]):
    temp = df[(df.Acquisitio=='LAP') & (df.County.isin(county_grp))]\
        .pivot_table(index='year', 
                     columns='County', 
                     values='SWIS_SBL',
                     aggfunc='nunique', 
                     fill_value=0)
    temp.index = temp.index.astype('int')
    temp.plot.bar(stacked=True, 
                  ax=axes[i],
                  figsize=(12,5),
                  title=titles[i])
    axes[i].set_xlabel('Year')
    axes[i].set_ylabel('Count')

plt.suptitle('Number of LAP-acquired Lands, per Year')

In [None]:
# save csv of annual counts per WOH county
acquired = df[df.Acquisitio=='LAP']\
                .pivot_table(index='year', 
                     columns='County', 
                     values='SWIS_SBL',
                     aggfunc='nunique', 
                     fill_value=0)
acquired.to_csv(LAND_COUNTS_FILEPATH)

In [None]:
temp = nyc_df.pivot_table(index='year', 
                          columns='Acquisitio', 
                          aggfunc='count', 
                          fill_value=0)
temp = temp['CLOSING_DT']
temp.index = temp.index.astype('int')

ax = temp.plot.bar(stacked=True, 
                   figsize=(7,5),
                   title='Method of Acquisition, per Year')
ax.legend(bbox_to_anchor=(1,1))
ax.set_xlabel('Year')
ax.set_ylabel('Count')

In [None]:
_, axes = plt.subplots(1, 2, sharex=True)
titles = ['West of Hudson', 'East of Hudson']
for i, county_grp in enumerate([WOH_COUNTIES, EOH_COUNTIES]):
    temp = df[(df.Acquisitio=='LAP') & (df.County.isin(county_grp))]\
        .pivot_table(index='year', 
                     columns='County', 
                     values='GIS_Acres',
                     aggfunc='sum', 
                     fill_value=0)
    temp.index = temp.index.astype('int')
    temp.plot.bar(stacked=True, 
                  ax=axes[i],
                  figsize=(12,5),
                  title=titles[i])

    axes[i].set_xlabel('Year')
    axes[i].set_ylabel('Acres')
    
plt.suptitle('Acres of LAP-acquired Lands, per Year')


In [None]:
# saves as csv the acres of lands acquired per year, by county
acquired_acres = df[df.Acquisitio=='LAP']\
        .pivot_table(index='year', 
                     columns='County', 
                     values='GIS_Acres',
                     aggfunc='sum', 
                     fill_value=0)
acquired_acres.to_csv(LAND_ACRES_FILEPATH)

In [None]:
# acquired acres / county acres
acquired_acres_normed = acquired_acres/areas.transpose().loc['acres']
acquired_acres_normed.to_csv(LAND_ACRES_NORM_FILEPATH)

# Export data as json for viz
### Annual acres

In [None]:
df = pd.read_csv(LAND_ACRES_FILEPATH, dtype={'year': 'int'}, index_col='year')
df.head(2)

In [None]:
import json
with open('../output/lap_woh_with_landuse.json', 'r') as f:
    data = json.load(f)

In [None]:
# add annual total land acres info to existing lap json
for county, total_acres_dict in df.to_dict().items():
    if county not in data: 
        data[county] = {}
    data[county]['All Landuse Types'] = total_acres_dict

In [None]:
with open('../output/lap.json', 'w') as f:
    json.dump(data, f)

### Annual cumulative acres

In [None]:
sum_df = df.cumsum()
sum_df['All Counties'] = sum_df.sum(axis=1)
sum_df.head()

In [None]:
with open('../output/lap_cumulative_acres.json', 'w') as f:
    json.dump(sum_df.to_dict(), f, indent=2)