## Mersey data (Keavney et al. 2021)

____
- Press cmd + enter or shift + cmd to run a cell. 
- Run cells in order.

### Run this cell

In [None]:
import pandas as pd # imports packages
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

%config InlineBackend.figure_formats = ['svg'] # make outputs nicer

fibres = pd.read_excel('KS Euan.xlsx', sheet_name = 'Fibres', header = 1) # loads sheet
frags = pd.read_excel('KS Euan.xlsx', sheet_name = 'Fragments', header = 1) # loads sheet
beads = pd.read_excel('KS Euan.xlsx', sheet_name = 'Beads', header = 1) # loads sheet

### ** Edit and run this cell **

In [None]:
data_one = fibres['Liverpool Bay']  # microplastic_type['location'] e.g. fibres['Liverpool Bay']
data_two = fibres['River Alt'] # microplastic_type['location'] e.g. frags['River Alt']

microplastic_type = 'Fibre' # sets microplastic type for x-axis label

file_title = 'bay_estuary'  # sets file title

### Run this cell

In [None]:
print(stats.kstest(data_one.dropna(), data_two.dropna())) # prints KS test result

p_val = stats.kstest(data_one.dropna(), data_two.dropna())[1] # saves p-value

one_length, two_length = len(data_one.dropna()), len(data_two.dropna()) # gets length of data

fig, ax = plt.subplots(figsize = (7, 4)) # makes figure

sns.kdeplot(data_one.dropna(), label = f'{data_one.name}\nn = {one_length}', 
            color = 'tab:red', shade = True, ax = ax) # plot data 1
                                                     
sns.kdeplot(data_two.dropna(), label = f'{data_two.name}\nn = {two_length}', 
            color = 'tab:blue', shade = True, ax = ax) # plot data 2

plt.legend()
plt.xlabel(f'{microplastic_type} size (mm)')
plt.xlim(left = 0)
plt.title(f'KS test p-value = {round(p_val, 4)}', fontsize = 14)

plt.savefig('{}.jpg'.format(file_title), dpi = 400) # save fig