# winter 2022 week03

## goals:

reload the data
run util/load_data.py convert_all() function -- which loads everything and converts it to parquet

## todo
- ~add PIPA to list of interesting ones (57 -> 58)~
- ~figure out how to get the thing to run faster (try to just use geopoints_sampled in analyze)~
- ~final count of reduced fishing~
- run the analyze thing over those 7/8
- make those charts of in/out fishing effort of individual ships for those 7/8
- make pictures of before/after fishing effort of individual ships for those 7/8 (could use consistent colors)

THEN:
- ~make ~table~ scatter-plot of (low, low), (low, high), (high, low), (high, high). etc. of the 171~
- ~make table of (?, low), (?, high) of all the other ones~
- try to categorize the mpas (sum up area and #, etc.)

### of the 171:
- about 14 showed notable decrease in both # hours and %
- significant number (>100) of the 171 had no effort in before or after
- very small number (3) showed a modest increase in % internal effort
(should I quantify this by marine area?)
- any sort of before/after pictures helpful? (at least make the before/after effort plots)
- decide on criteria: % decrease by at least something
- for all the matching ones, do a before/after



AND THEN:
- compare to the paper about 5 MPAs
- email Dan to show him everything
- title: what can GFW data tell us about the true protection of MPA?
- two effective scenarios: (high, low) and (low, low) ~= (?, low)


## notes:



In [None]:
%load_ext autoreload

In [None]:
%autoreload 2

In [None]:
cd /Users/brendan/Documents/projects/mpa_project/gfw_research

In [None]:
import numpy as np
import pandas
from datetime import datetime
import matplotlib.pyplot as plt
import os
import pywdpa
import geopandas
import contextily as ctx
from shapely import geometry
from shapely import ops
import pretty_html_table
import cProfile

import util

pandas.set_option('display.max_columns', None)
pandas.set_option('display.max_rows', None)

In [None]:
DATA_PATH = "/Users/brendan/Documents/projects/mpa_project/gfw_research/data/"
FILENAME = "mpatlas_20201223_clean/mpatlas_20201223_clean.shp"

#mpas = geopandas.read_file(DATA_PATH + FILENAME)
# util.load_mpatlas_mpas()
mpas = geopandas.read_parquet("data/mpas/mpatlas.parquet")

In [None]:
def load_year(year):
    year = str(year)
    print(f'\r reading year {year}', end='')
    return pandas.read_parquet("data/points/" + year + ".parquet")

In [None]:
points_by_year = {
    year : load_year(year)
    for year in range(2012, 2021)
}

In [None]:
# sample the points here before converting to geo
# use the sampled points to find the mmsi that are interesting
# only then should we get all of the points of relevent mmsi

SAMPLE_RATIO = 100
points_sampled = []
for year, points in points_by_year.items():
    print(f'\rsampling {str(year)}... ', end='')
    points_sampled.append(points.sample(frac=1/SAMPLE_RATIO))
print('done.')
points_sampled = pandas.concat(points_sampled)
print("converting to geo...  ", end='')
geopoints_sampled = util.convert_to_geo(points_sampled, box=True)
print("done.")


In [None]:
no_take = mpas[(mpas['no_take']=='All') & (mpas['implemente'])]
of_interest_frame = no_take[
    ((no_take['status_yea'] > 2012) | (no_take['implementa'] > '2012-01-01')) & no_take['implemente']]
of_interest = [
    (None, 555512002, 'Phoenix Island Protected Area', '2015-01-01')
]
# for i, row in no_take[no_take['status_yea'] > 2012].iterrows():
for i, row in of_interest_frame.iterrows():
    date = row['implementa']
    if date is None:
        date = str(row['status_yea'])
    of_interest.append(
        (row['mpa_id'], row['wdpa_id'], row['name'], date)
    )

np.random.shuffle(of_interest)

In [None]:
import util
folder_prefix = f'plots/{np.datetime64("now")}'

tables = pandas.DataFrame()
summed_tables = pandas.DataFrame()
j = 0
for mpa_id, wdpa_id, name, date in of_interest:
    j += 1
    print(f'\r{j}/{len(of_interest)} {mpa_id} {wdpa_id} {name} {date}                           ', end='')
    mpa = None
    if wdpa_id:
        mpa = mpas[mpas['wdpa_id'] == wdpa_id].dissolve(by='wdpa_id')
    elif mpa_id:
        mpa = mpas[mpas['mpa_id'] == mpa_id].dissolve(by='mpa_id')
    else:
        print(f'no id for {name}, skipping')
        continue
        
    folder = folder_prefix + f'/mpas/{name}/'
    
    returned = util.analyze_mpa(
        geopoints_sampled, points_by_year, mpa, date, name, verbose=False,
        plot=True, folder=folder)
    if returned is not None:
        table, points_of_mpa_ships = returned
        table.insert(0, 'name', name)
        table.insert(1, 'date', date)
        table.insert(2, 'wdpa_id', wdpa_id)
        table.insert(3, 'mpa_id', mpa_id)
        
        
        html_table_blue_light = pretty_html_table.build_table(table, 'blue_light')
    # Save to html file
        with open(folder+'/individual_ships.html', 'w') as f:
            f.write(html_table_blue_light)
        
        tables = tables.append(table)
        
        summed = table.groupby('name').aggregate({
            'name': 'first',
            'wdpa_id': 'first',
            'mpa_id': 'first',
            'date': 'first',
            'mmsi': 'count',
            'in_pre': 'sum',
            'out_pre': 'sum',
            'in_post': 'sum',
            'out_post': 'sum'})
        summed_tables = summed_tables.append(summed)
    else:
        summed_tables = summed_tables.append(
            {'name': mpa.iloc[0]['name'], 'wdpa_id': wdpa_id, 'mpa_id': mpa_id, 'date': date}
            , ignore_index=True)

# Populate the summed tables percentages columns
summed_tables['in_pre_p'] = summed_tables['in_pre'] / (summed_tables['in_pre'] + summed_tables['out_pre'])
summed_tables['in_post_p'] = summed_tables['in_post'] / (summed_tables['in_post'] + summed_tables['out_post'])
summed_tables['decrease'] = (summed_tables['in_pre_p'] - summed_tables['in_post_p'])/(summed_tables['in_pre_p'])
summed_tables = summed_tables.fillna(0)



# Save to html files
html_table = pretty_html_table.build_table(summed_tables.sort_values('decrease', ascending=False), 'blue_light')
with open(folder_prefix+'/summed_effort_by_decrease.html', 'w') as f:
    f.write(html_table)
    

html_table = pretty_html_table.build_table(summed_tables.sort_values('mmsi', ascending=False), 'blue_light')
with open(folder_prefix+'/summed_effort_by_mmsi.html', 'w') as f:
    f.write(html_table)