In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np 
import pickle
import shapely
from sklearn import metrics
from tensorflow import keras
from tqdm.notebook import tqdm

from scripts import dl_utils

# Compute Trends in Landfill Area

In [None]:
input_file = 'indonesia_v0_mask_window_8_upsampled_4_contours_model_v0.0.11_ensemble-8-25-21'
contours = gpd.read_file(f'../../data/model_outputs/site_contours/v2.1/{input_file}.geojson')

In [None]:
contours['area (km^2)'].hist(bins=100)

In [None]:
site_names = contours['name'].unique()
contours.loc[contours['area (km^2)'] < 0.005, 'area (km^2)'] = None
#contours.loc[contours['area (km^2)'] > 0.05, 'area (km^2)'] = None

In [None]:
contours.groupby('date').count()

In [None]:
plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
contours.groupby('date').count()['geometry'].plot()
plt.xticks(range(0,len(dates), 2), dates[::2], rotation=45, ha='right')
plt.title('Number of Sites with Contours Detected Through Time - Indonesia')
plt.ylabel('Number of Sites')
plt.grid()
plt.savefig('../../figures/Number of Sites with Contours Detected Through Time - Indonesia', bbox_inches='tight')
plt.show()

## Aggregate mean/median area across all sites

In [None]:
plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
areas = contours.groupby('date').median()['area (km^2)'] * 1000000
dates = [elem[:10] for elem in contours.groupby('date').mean().index]
plt.plot(dates, areas)
plt.xticks(range(0,len(dates), 2), dates[::2], rotation=45, ha='right')
plt.title('Median Value of Contour Areas in Indonesia')
plt.ylabel('Site Footprint ($m^2$)')
plt.savefig('../../figures/median_contour_area_indonesia.png', bbox_inches='tight')
plt.show()

plt.figure(figsize=(8,5), dpi=100)
areas = contours.groupby('date').mean()['area (km^2)']
dates = [elem[:10] for elem in contours.groupby('date').mean().index]
plt.plot(dates, areas)
plt.xticks(range(0,len(dates), 2), dates[::2], rotation=45, ha='right')
plt.title('Mean Value of Contour Areas in Indonesia')
plt.show()

In [None]:
plt.figure(figsize=(8,5), dpi=150)
stdev = contours.groupby('date').std()['area (km^2)'].values
dates = [elem[:10] for elem in contours.groupby('date').mean().index]
plt.errorbar(dates, contours.groupby('date').mean()['area (km^2)'].values, stdev)
plt.xticks(range(0,len(dates), 2), dates[::2], rotation=45, ha='right')
plt.title('Mean and STDev of Contour Areas in Indonesia')
plt.show()

## Aggregate total landfill areas in Indonesia
Not really a valid metric because fewer contours are present in later years

In [None]:
plt.figure(figsize=(8,5), dpi=100)
areas = contours.groupby('date').sum()['area (km^2)']
dates = [elem[:10] for elem in contours.groupby('date').mean().index]
plt.plot(dates, areas)
plt.xticks(range(0,len(dates), 2), dates[::2], rotation=45, ha='right')
plt.title('Sum of Contour Areas in Indonesia')
plt.ylabel('Total Landfill Area (km^2)')
plt.show()

plt.figure(figsize=(8,5), dpi=100)
areas = contours.groupby('date').count()['area (km^2)']
dates = [elem[:10] for elem in contours.groupby('date').mean().index]
plt.plot(dates, areas)
plt.xticks(range(0,len(dates), 2), dates[::2], rotation=45, ha='right')
plt.title('Count of Contour Areas in Indonesia')
plt.ylabel('Total Landfill Area (km^2)')
plt.show()

## Average relative change in site area

In [None]:
for site in site_names:
    coi = contours[contours['name'] == site]
    median = coi['area (km^2)'].median()
    contours.loc[contours[contours['name'] == site].index, 'change'] = (coi['area (km^2)'] / median) - 1
contours

In [None]:
plt.figure(figsize=(8,5), dpi=100)
areas = contours.groupby('date').median()['change']
dates = [elem[:10] for elem in contours.groupby('date').mean().index]
plt.plot(dates, areas)
plt.xticks(range(0,len(dates), 2), dates[::2], rotation=45, ha='right')
plt.title(' of Contour Areas in Indonesia')
plt.ylabel('Total Landfill Area (km^2)')
plt.show()

In [None]:
contours.groupby('date').median()['change']


In [None]:
contours.groupby('date').count()

In [None]:
plt.figure(figsize=(8,5), dpi=150)
stdev = contours.groupby('date').std()['change'].values
dates = [elem[:10] for elem in contours.groupby('date').mean().index]
plt.errorbar(dates, contours.groupby('date').mean()['area (km^2)'].values, stdev)
plt.xticks(range(0,len(dates), 2), dates[::2], rotation=45, ha='right')
plt.title('Mean and STDev of Contour Areas in Indonesia')
plt.show()

In [None]:
contours.groupby('date').std()

# Compute Distance to Waterway

In [None]:
metadata = pd.read_csv('../../data/site_metadata/SE_ASIA_METADATA.csv')

In [None]:
metadata_gdf = gpd.GeoDataFrame(geometry = [shapely.geometry.Point([lon, lat]) for lon, lat in zip(metadata['lon'], metadata['lat'])])
for col in metadata.columns[4:]:
    metadata_gdf[col] = metadata[col]
metadata_gdf


In [None]:
indonesia = gpd.read_file('../../data/sampling_locations/indonesia_v0_condensed.geojson')
indonesia

In [None]:
buffered_geom = [point.buffer(0.02) for point in metadata_gdf['geometry']]

In [None]:
distance = []
for data_site in tqdm(indonesia['geometry']):
    distance.append(np.mean(metadata_gdf[[site.buffer(0.001).contains(data_site) for site in metadata_gdf['geometry']]])['distance to waterway(m)'])

In [None]:
plt.hist(distance, bins=25)
plt.show()

In [None]:
plt.figure(figsize=(8,5), dpi=100, facecolor=(1,1,1))
plt.hist(distance, cumulative=True, histtype='step', bins=1000, density=True)
plt.grid()
plt.xlim(0, sorted(distance)[-1] - 1)
plt.ylim(0,1)
plt.title('Cumulative Fraction of Waste Sites by Distance to Nearest Waterway')
plt.xlabel('Distance to Waterway (m)')
plt.ylabel('Fraction of Detected Sites in Indonesia')
plt.savefig('../../figures/Indonesia Distance to Waterway.png')
plt.show()