### general tenplate


Fetches data from the local source and esyablishes the following variables:

1. dutch_codes
2. swiss_codes
3. dutch_surveys
4. swiss_surveys
5. swiss_beaches


Establishes directory variables for fetching and putting to all subdirectories:

1. data
2. beaches
3. codes
4. geo
5. output

provides a script to update the remote data.

In [1]:
# sys things
import os
import sys
import json

# networks
import requests

# data
import pandas as pd
import numpy as np
import scipy
import math
import seaborn as sns

import resources.utilities.utility_functions as ut

In [2]:
# get folder extesions
data, beaches, codes, geo, output=ut.make_local_paths()
print(data, beaches, codes, geo, output)

resources/surveydata resources/locationdata resources/mlwcodedefs resources/geodata output


In [42]:
# code data
dutch_codes = pd.read_csv(codes+'/dutch_codes.csv')
dutch_codes.fillna(0, inplace=True)
# housekeeping
dutch_codes.rename(columns={'OSPAR_ID':'ospar_id', 'Description':'description'}, inplace=True)
swiss_codes = pd.read_csv(codes+'/swiss_codes.csv')
swiss_codes.rename(columns={'ospar_code':'ospar_id'}, inplace=True)
swiss_codes.fillna(0, inplace=True)

# survey_data
dutch_surveys = pd.read_csv(data+'/dataset_macrolitter_NL.csv')
swiss_surveys = pd.read_csv(data+'/hammerdirt_data.csv')
#housekeeping


# location data
swiss_beaches = pd.read_csv(beaches+'/hammerdirt_beaches.csv')

In [43]:
# process the ducth codes:
# identify codes that are common to both 'ospar_id' columns
dutch_codes['parent_code'] = dutch_codes.ospar_id.round(0)
dutch_codes['child_code'] = dutch_codes.ospar_id - dutch_codes.parent_code

# the number of child codes:

child_codes = dutch_codes.loc[dutch_codes.child_code > 0]
ccodes = child_codes.parent_code.unique()

# all the codes with no remainder:
parent_codes = dutch_codes.loc[dutch_codes.child_code == 0]
pcodes = parent_codes.parent_code.unique()

dcodesall = dutch_codes.parent_code.unique()

In [57]:
# process the swiss codes
scodesall = swiss_codes.ospar_id.unique()

def drop_bad_codes(x):
    try:
        the_x = int(x)
    except:
        the_x = 0
    else:
        pass     
    finally:
        return the_x 

fixed_swiss_codes = [drop_bad_codes(x) for x in scodesall]
swiss_codes['ospar_id']=swiss_codes.ospar_id.map(lambda x: drop_bad_codes(x))

# make sur they match:
print(fixed_swiss_codes == list(swiss_codes.ospar_id.unique()))

True


In [72]:
# codes in swisscode not in dutch codes:
noncodes = [ x for x in fixed_swiss_codes if x not in dcodesall]

print("The ospar codes unaccounted for\n")
print(noncodes)
print("\n")

# code defininitions for noncodes:
noncodesdf = swiss_codes.loc[swiss_codes.ospar_id.isin(noncodes)]

# mlw codes not accounted for:

print("The MLW codes unaccounted for\n")
print(noncodesdf.code.unique())

The ospar codes unaccounted for

[0, 181, 111, 56, 94, 96, 118, 12, 23, 114, 45]


The MLW codes unaccounted for

['G212' 'G213' 'G214' 'G136' 'G139' 'G140' 'G142' 'G143' 'G202' 'G203'
 'G204' 'G205' 'G206' 'G207' 'G208' 'G209' 'G210' 'G917' 'G921' 'G174'
 'G179' 'G180' 'G183' 'G184' 'G185' 'G186' 'G187' 'G189' 'G190' 'G191'
 'G192' 'G193' 'G195' 'G196' 'G197' 'G198' 'G199' 'G146' 'G147' 'G148'
 'G149' 'G150' 'G154' 'G156' 'G157' 'G158' 'G101' 'G102' 'G103' 'G104'
 'G105' 'G106' 'G107' 'G108' 'G109' 'G110' 'G111' 'G112' 'G113' 'G114'
 'G115' 'G116' 'G117' 'G118' 'G119' 'G120' 'G121' 'G122' 'G123' 'G124'
 'G13' 'G14' 'G15' 'G16' 'G17' 'G18' 'G19' 'G2' 'G23' 'G36' 'G37' 'G39'
 'G42' 'G43' 'G44' 'G45' 'G46' 'G47' 'G48' 'G49' 'G5' 'G50' 'G51' 'G52'
 'G53' 'G54' 'G55' 'G56' 'G57' 'G58' 'G59' 'G60' 'G61' 'G62' 'G63' 'G64'
 'G65' 'G68' 'G69' 'G70' 'G71' 'G72' 'G73' 'G74' 'G77' 'G80' 'G83' 'G84'
 'G85' 'G86' 'G89' 'G90' 'G909' 'G92' 'G925' 'G926' 'G927' 'G928' 'G930'
 'G931' 'G932' 'G933' 'G93

In [5]:
## !!! refresh the data from the hammerdirt api here:

# a = requests.get('https://mwshovel.pythonanywhere.com/api/surveys/daily-totals/code-totals/swiss/')
# b = requests.get('https://mwshovel.pythonanywhere.com/api/list-of-beaches/swiss/')
# c = requests.get('https://mwshovel.pythonanywhere.com/api/mlw-codes/list/')

# # the surveys need to be unpacked:
# swiss_surveys = ut.unpack_survey_results(a.json())
# swiss_surveys = pd.DataFrame(swiss_surveys)

# # adding location date column
# swiss_surveys['loc_date'] = list(zip(swiss_surveys['location'], swiss_surveys['date']))

# # hold the original
# x = a.json()

# print("survey columns")
# print(swiss_surveys.columns)

# swiss_beaches = pd.DataFrame(b.json())
# print("beach columns")
# print(swiss_beaches.columns)

# print("code columns")
# swiss_codes = pd.DataFrame(c.json())
# print(swiss_codes.columns)

# swiss_surveys.to_csv(data+'/hammerdirt_data.csv')
# swiss_beaches.to_csv(beaches+'/hammerdirt_beaches.csv')
# swiss_codes.to_csv(codes+'/swiss_codes.csv')
