In [11]:
import geopandas as gpd
import logging
import pandas as pd
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import config
import os
import glob
import datetime
from dateutil.relativedelta import relativedelta
import zipfile
import fiona
import shutil

load_dotenv('.env')
os.environ['PROJ_LIB'] = '/opt/conda/share/proj'

## Config

In [2]:
def create_nl_zipfiles():
    
    today = datetime.date.today()# - relativedelta(months=1) #(uncomment if current month hasn't been uploaded yet
    year = today.year
    month_name = today.strftime('%b').lower()
    month_number = today.strftime('%m')
    
    nl_dict = {'platforms': f'https://www.nlog.nl/sites/default/files/{year}-{month_number}/{month_name}-{year}-nlog-facility_utm.zip',
               'licences': f'https://www.nlog.nl/sites/default/files/{year}-{month_number}/{month_name}-{year}-nlog-licences_utm.zip',
               'fields': f'https://www.nlog.nl/sites/default/files/{year}-{month_number}/{month_name}-{year}-nlog-fields_utm.zip',
               'wellbores': f'https://www.nlog.nl/sites/default/files/{year}-{month_number}/{month_name}-{year}-nlog-boreholes_utm.zip'}
    
    
    return dict(nl_dict)

In [3]:
# download urls

zip_dict = {'no': 'https://factpages.npd.no/downloads/fgdb/NPD_FactMapsData_v3_0.zip',
            'uk': 'https://datanstauthority.blob.core.windows.net/external/OpenDataZips/UKCS_OFF_WGS84.zip',
            'nl': create_nl_zipfiles()}

In [4]:
wfs_dict = {'nl': 'https://geo.rijkswaterstaat.nl/services/ogc/gdr/kabels_en_leidingen_noordzee/ows?',
            'int': 'https://ows.emodnet-humanactivities.eu/wfs?SERVICE%3DWFS&REQUEST%3DGetCapabilities&VERSION=2.0.0'
                    }

In [22]:
crs_dict = {'no': 23032,
           'uk': 4326,
           'be': 4326,
           'de': 4326,
           'dk': 4326,
           'int': 4326}

In [6]:
country_dict = {'be': 'Belgium',
                'dk': 'Denmark',
                'uk': 'United Kingdom',
                'no': 'Norway',
                'de': 'Germany',
                'nl': 'Netherlands'}

## Checks

In [49]:
# General functions

def check_countries(countries):
    '''
    Parameters
    ----------
    countries : list
    List of iso-2 country codes, use 'all' for all countries
    '''
    if not isinstance(countries, list):
        raise TypeError(f'List of countries is needed, not {type(countries)}')
    
    # create list if all countries are requested
    if 'all' in countries:
        countries = ['no', 'nl', 'uk', 'be', 'dk', 'de']
    else:
        pass
    return countries

def set_crs(gdf, country):
    '''
    Parameters:
    -----------
    gdf : geodataframe
        Geodataframe to check
    country: string
        Country to set crs'''
    
    gdf = gdf.set_crs(crs_dict.get(country))
    gdf = gdf.to_crs(23032)
        
    return logging.info(f'CRS is changed to {gdf.crs}')


def export_to_geopackage(gdf, country, name, path):

    path = f'../data/{country}/'

    if not os.path.exists(path):
        os.makedirs(path)

    if len(gdf) > 0:
        gdf.to_file(f'{path}{country}_geopackage.gpkg', layer=name, driver='GPKG')

    return print(f'exported {country} -- {name} -- to: {path}')

def export_to_postgres(file, layer, country, engine):
    
    gdf = gpd.read_file(file, layer=layer)
    set_crs(gdf, country)
    gdf.columns = gdf.columns.str.lower()
    
    try:
        gdf.to_postgis(f'{country}_{layer.lower()}', engine, if_exists='append', index=False)
    except ValueError:
        df = pd.DataFrame(gdf)
        df.to_sql(f'{country}_{layer.lower()}', engine, if_exists='append', index=False)
    except AttributeError:
        gdf = gdf[gdf['geometry'] != None]
        gdf.to_postgis(f'{country}_{layer.lower()}', engine, if_exists='append', index=False)


## Get WFS

In [8]:
def get_wfs_layers(url):
    '''Get list of available layers
    and their index'''
    wfs = WebFeatureService(url, version='2.0.0')
    for i, layer in enumerate(wfs.contents):
        print(i, layer)
        
def select_wfs_layer(url, index):
    '''Select a wfs layer by index'''
    wfs = WebFeatureService(url, version='2.0.0')
    layer = list(wfs.contents)[index]
    return layer


def wfs2gdf(layer, url, output_format, wfs_version="2.0.0"):
    '''Needs layer, wfs_url and output_format
    as input and creates a geodataframe
    '''
    
    params = dict(service='WFS', 
                  version=wfs_version, 
                  request='GetFeature', 
                  typeName=layer, 
                  outputFormat=output_format)
    
    if 'xml' in output_format:
        with BytesCollection(requests.get(url, params=params).content) as f:
            gdf = gpd.GeoDataFrame.from_features(f)
    
    elif 'json' in output_format:
            r = requests.get(url, params=params)
            gdf = gpd.read_file(r.text)
        
    return gdf

## Import functions

In [9]:
# For now, only nl, no and uk have zipfiles to download

def download_zipfiles(countries):
    '''Downloads zipfiles
    Parameters:
    -----------
    countries : list
        List of countries'''
    
    for country in check_countries(countries):
        print(f'working on {country}')
        if country not in ['nl', 'uk', 'no']: # change this if zipfiles for other countries are available
            raise ValueError(f'No zipfiles available for {country}, please use function download_wfs')

        if isinstance(zip_dict.get(country), dict):
            names = [key for key in zip_dict.get(country).keys()] # get names for logging purposes
            urls = [value for value in zip_dict.get(country).values()]
            
        else:
            names = country.split()
            urls = zip_dict[country].split()
        
        path = f'../data/{country}/'

        if not os.path.exists(path):
            os.makedirs(path)

        for name, url in zip(names, urls):
            r = requests.get(url)
            z = zipfile.ZipFile(io.BytesIO(r.content))
            z.extractall(path)
        
        # Write files to geopackage, this is not necessary per se, but makes life a little bit easier later on
        if country in ['nl', 'uk']:
            for file in glob.glob(f'{path}/*.shp'):
                if country == 'uk':
                    name = os.path.basename(file)[5:-9].lower()
                elif country == 'nl':
                    name = os.path.basename(file)[14:-8].lower()
                
                gdf = gpd.read_file(file)
                gdf = gdf[~gdf.geometry.isna()]
                
                export_to_geopackage(gdf, country, name)
        
        

In [None]:
def download_wfs(countries):
    
    for country in check_countries(countries):
        
        if country in ['uk', 'no']:
            raise ValueError(f'No wfs available for {country}, please use function download_zipfiles')
        
        elif country in ['be', 'dk', 'de', 'int']:
            _country = 'int'
            
            names = ['licences', 'platforms', 'pipes', 'wellbores']
            emod_layers = [0, 78, 79, 24]
            
            url = wfs_dict.get(_country)
            
            for name, layer in zip(names, emod_layers):
                gdf = wfs2gdf(select_wfs_layer(url, layer), url, 'json')
                gdf = gdf[gdf.country == country_dict.get(country)]
                
                export_to_geopackage(gdf, country, name)
        
        elif country == 'nl':
            pipes_url = wfs_dict.get('nl') # change config
            gdf = wfs2gdf(select_wfs_layer(pipes_url, 2), pipes_url, 'json')
            export_to_geopackage(gdf, country, 'pipes')

In [None]:
def download_production(countries):
    
    

In [None]:
def write_to_pg(countries):
    
    for country in check_countries(countries):
        path = f'../data/{country}/'
        
        if country == 'no':
            file = f'{path}NPD_FactMapsData_v3_0.gdb/'
        
        else:
            file = f'{path}{country}_geopackage.gpkg'
            
        layers = fiona.listlayers(file)
        for layer in layers:
            export_to_postgres(file, layer, country, engine)
