To run a cell, type `Shift` + `Return`. Run the cell below to get started

In [None]:
from workshop_utils import * 
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import datetime
import geopandas as gpd
import shapely
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
from tqdm import tqdm_notebook as tqdm
tqdm().pandas();

# Puerto Rico Example


The following query gets all of the edits (ever), joined to changesets for all objects in Puerto Rico... it should take some time to download.

```sql
SELECT
  central_america.id, 
  geom,
  central_america.tags, changeset, updated, valid_until, version, minor_version,
  changesets.id AS c_id, 
  changesets.tags as c_tags,
  changesets.uid,
  changesets.user
  
FROM central_america

JOIN changesets on central_america.changeset = changesets.id

WHERE ST_WITHIN(
  geom , 
  ST_POLYGON('POLYGON((-67.3066 18.5729, -65.5553 18.5729, -65.5553 17.8729, -67.3066 17.8729, -67.3066 18.5729))'))
ORDER BY updated
```

In [None]:
# edits = load_dataframe_from_s3('https://us-east-2.console.aws.amazon.com/athena/query/results/116fe037-6c95-4517-b52b-de3ecc3ba763/csv')
#edits = load_dataframe_from_s3('https://us-east-2.console.aws.amazon.com/athena/query/results/5e4ea35a-0c61-4a79-8af0-77e18612ed13/csv')
edits = load_dataframe_from_s3('https://us-east-2.console.aws.amazon.com/athena/query/results/45b14013-55ee-49a3-98e2-6fb47e45302d/csv')


In [None]:
place = "NW Puerto Rico"

In [None]:
edits.head(2)

In [None]:
#Cast the date as date
edits['updated'] = edits.updated.progress_apply(pd.Timestamp)
edits['date'] = edits.updated.progress_apply(pd.Timestamp.date)

In [None]:
gb_date = edits.groupby('date').aggregate({
    'c_id':'count',
    'id':'nunique',
    'uid':'nunique'
});
gb_date.head(2)

In [None]:
sns.set_style('whitegrid')
ax = gb_date['c_id'].plot(figsize=(14,4))
ax.set_xlabel("Date"); ax.set_ylabel("");
ax.set_title("Edits in {}".format(place),fontsize=16);

### Question: What's the spike in 2007? 

In [None]:
sns.set_style('whitegrid')
ax = gb_date['uid'].plot(style='.', figsize=(14,4))
ax.set_xlabel("Date"); ax.set_ylabel("");
ax.set_title("Unique editors active per day in Port Au Prince",fontsize=16);

### Note, there is no user spike in 2007...

In [None]:
after_tiger = edits[edits.updated > datetime.datetime(2010,1,1)]

In [None]:
from shapely import wkt

In [None]:
sample2 = after_tiger.sample(100000) #just sample 100k objects

In [None]:
sample2.geom = sample2.geom.progress_apply(wkt.loads)

In [None]:
sample2['date'] = sample2.updated.apply(lambda x: x.isoformat())

In [None]:
objects = gpd.GeoDataFrame(sample2, geometry='geom')

In [None]:
from keplergl import KeplerGl 
map_1 = KeplerGl(height=600)
map_1

In [None]:
map_1.add_data(data=objects[['geom','changeset','date','user']], name='objects')

## Make it interactive

From the sidebar on the left, add a filter to the 'objects' layer on 'date'

Also, set layer blending to "normal" to highlight where the most edits are happening