In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import geopandas as gpd
import requests
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
endpoint = 'http://api.plastic.watch.earthrise.media/sites?limit=10000'
response = requests.get(endpoint)
data = response.json()
data = gpd.GeoDataFrame.from_features(data['features'])

for c in data.columns:
    try:
        data[c] = data[c].astype(float)
    except:
        pass
data['area (ha)'] = data['area (km^2)'] * 100

In [None]:
data.loc[data['Distance to Waterway (m)'] == -1, 'Distance to Waterway (m)'] = 10000

# Statistics for All Sites

In [None]:
plt.figure(figsize=(10,4), dpi=150)
plt.subplot(1,2,1)
sns.set_theme()
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
fig = sns.histplot(data, 
        x='Distance to Waterway (m)',
        cumulative=True,
        stat='density',
        fill=False,
        element='step',
        bins=1000)
#plt.grid()
plt.ylim([0,1])
plt.xticks([100, 500, 1000, 2000, 3000, 4000, 5000], ha='right', rotation=45)
plt.yticks([0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0])
plt.xlim([0, 5000])
title = 'Cumulative Distribution of Waste Site\nDistances to Nearest Waterway'
plt.title(title)
plt.subplot(1,2,2)
# bar plot the Nearest Water Type column
water_types = data.groupby('Nearest Water Type').count().sort_values('Distance to Waterway (m)', ascending=False).index
sns.countplot(x='Nearest Water Type', data=data, order=water_types, color='C0', edgecolor="0.1", linewidth=0.75)
x_labels = [name.capitalize() for name in water_types]
x_labels = [name if name != 'Water' else 'Unspecified' for name in x_labels]
plt.xticks(range(0, len(x_labels)), x_labels, rotation=45, ha='right')
plt.tick_params(axis='x', pad=-4)
bar_title = 'Count of Nearest Water Type for All Detected Sites'
plt.title(bar_title)
plt.ylabel('Count')
plt.subplots_adjust(wspace = 0.3)
plt.savefig(f'../../figures/{title}.png', bbox_inches='tight')
plt.show()

In [None]:
print(f"We have detected and confirmed {len(data):,} sites in {len(data['country'].unique())} countries")
print(f"There are {sum(data['Distance to Waterway (m)'] < 250)} sites within 250m of a waterway ({sum(data['Distance to Waterway (m)'] < 250) / len(data):.1%})")
print(f"There are {sum(data['Distance to Waterway (m)'] < 100)} sites within 100m of a waterway ({sum(data['Distance to Waterway (m)'] < 100) / len(data):.1%})")
print(f"On average, {data['Population - 1 km'].mean():,.0f} people live within 1 km of a site and {data['Population - 5 km'].mean():,.0f} people live within 5 km")

In [None]:
# make a barplot of number of sites per country
plt.figure(figsize=(10,4), dpi=150)
sns.set_theme()
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
countries = data.groupby('country').count().sort_values('Distance to Waterway (m)', ascending=False).index
sns.countplot(x='country', data=data, order=countries, color='C0', edgecolor="0.1", linewidth=0.75)
x_labels = [name.capitalize() for name in countries]
plt.xticks(range(0, len(x_labels)), x_labels, rotation=45, ha='right')
plt.tick_params(axis='x', pad=-4)
bar_title = 'Count of Waste Sites per Country'
plt.title(bar_title)
plt.ylabel('Count')
plt.xlabel('')
plt.subplots_adjust(wspace = 0.3)
plt.savefig(f'../../figures/{bar_title}.png', bbox_inches='tight', facecolor=(1,1,1))
plt.show()

In [None]:
plt.figure(figsize=(4,4), dpi=150)
sns.set_theme()
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
text_labels = {
        'Peh': 0.51,
        'Jungut Batu': 1.24,
        'Temesi': 3.85,
        'Suwung': 4.47
}
for key, value in zip(text_labels.keys(), text_labels.values()):
        plt.axvline(x=value, color='gray', alpha=0.5, linestyle='--', linewidth=1)
        plt.text(value, 0.5, key, rotation=90, fontsize=10, ha='center', va='center', bbox={'facecolor': '#E6E6E6', 'alpha': 1, 'pad': 1.75, 'edgecolor': 'none'})

fig = sns.histplot(data, 
        x='area (ha)',
        cumulative=True,
        stat='density',
        fill=False,
        element='step',
        bins=10000)
plt.ylim([0,1])
plt.xlim([0, 5])
plt.xticks([0.1, 1, 2, 3, 4], labels=['0.1', '1', '2', '3', '4'], ha='center', rotation=0)
plt.xlabel('Area (ha)')
title = 'Cumulative Distribution of Waste Site Areas for All Detected Sites'
plt.title(title)
plt.savefig(f'../../figures/{title}.png', bbox_inches='tight')
plt.show()

In [None]:
asia_data = data[data['country'] != 'Nigeria']
print(len(asia_data))
# set all elements with a Distance to Waterway (m) value of -1 to 10000
asia_data.loc[asia_data['Distance to Waterway (m)'] == -1, 'Distance to Waterway (m)'] = 10000
# replace values of -1 in the nearest water type column with None
asia_data.loc[asia_data['Nearest Water Type'] == -1, 'Nearest Water Type'] = None

In [None]:
plt.figure(figsize=(10,4), dpi=150)
plt.subplot(1,2,1)
sns.set_theme()
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
fig = sns.histplot(asia_data, 
        x='Distance to Waterway (m)',
        cumulative=True,
        stat='density',
        fill=False,
        element='step',
        bins=1000)
#plt.grid()
plt.ylim([0,1])
plt.xticks([100, 500, 1000, 2000, 3000, 4000, 5000], ha='right', rotation=45)
plt.yticks([0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0])
plt.xlim([0, 5000])
title = 'Cumulative Distribution of Waste Site\nDistances to Nearest Waterway'
plt.title(title)
plt.subplot(1,2,2)
# bar plot the Nearest Water Type column
water_types = asia_data.groupby('Nearest Water Type').count().sort_values('Distance to Waterway (m)', ascending=False).index
sns.countplot(x='Nearest Water Type', data=asia_data, order=water_types, color='C0', edgecolor="0.1", linewidth=0.75)
x_labels = [name.capitalize() for name in water_types]
x_labels = [name if name != 'Water' else 'Unspecified' for name in x_labels]
plt.xticks(range(0, len(x_labels)), x_labels, rotation=45, ha='right')
plt.tick_params(axis='x', pad=-4)
bar_title = 'Count of Nearest Water Type for Detected Sites'
plt.title(bar_title)
plt.ylabel('Count')
plt.subplots_adjust(wspace = 0.3)
plt.savefig(f'../../figures/{title}.png', bbox_inches='tight')
plt.show()

In [None]:
tpa = gpd.read_file('../../data/sampling_locations/tpa_polygons.geojson')
tpa = tpa.to_crs("epsg:3857")
tpa['area'] = [poly.area * 0.0001 for poly in tpa['geometry']]
for name, area in zip(tpa['Name'], tpa['area']):
    print(f"{name}: {area:.2f} ha")

In [None]:
plt.figure(figsize=(4,4), dpi=150)
sns.set_theme()
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
text_labels = {
        'Peh': 0.51,
        'Jungut Batu': 1.24,
        'Temesi': 3.85,
        'Suwung': 4.47
}
for key, value in zip(text_labels.keys(), text_labels.values()):
        plt.axvline(x=value, color='gray', alpha=0.5, linestyle='--', linewidth=1)
        plt.text(value, 0.5, key, rotation=90, fontsize=10, ha='center', va='center', bbox={'facecolor': '#E6E6E6', 'alpha': 1, 'pad': 1.75, 'edgecolor': 'none'})

fig = sns.histplot(asia_data, 
        x='area (ha)',
        cumulative=True,
        stat='density',
        fill=False,
        element='step',
        bins=10000)
plt.ylim([0,1])
plt.xlim([0, 5])
plt.xticks([0.1, 1, 2, 3, 4], labels=['0.1', '1', '2', '3', '4'], ha='center', rotation=0)
plt.xlabel('Area (ha)')
title = 'Cumulative Distribution of Waste Site Areas - 2'
plt.title(title)
#plt.savefig(f'../../figures/{title}.png', bbox_inches='tight')
plt.show()

In [None]:
# read image from file to numpy array
import cv2
site_names = ['peh', 'jungut batu', 'temesi', 'suwung']
plt.figure(figsize=(4,4), dpi=150)
for index, name in enumerate(site_names):
    plt.subplot(2,2,index+1)
    img = cv2.imread(f'/Users/ckruse/Downloads/{name}.jpg')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.array(img)
    plt.imshow(img)
    plt.axis('off')
    plt.title(name.title())
plt.tight_layout()
plt.subplots_adjust(wspace = -0.2, hspace = .2)
plt.savefig(f'../../figures/area_site_images.png', bbox_inches='tight')
plt.show()