In [59]:
import geopandas as gpd
import os 
import pandas as pd
from sqlalchemy import create_engine, text
from pathlib import Path
from dotenv import load_dotenv
import fiona

env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)
#os.environ['PROJ_LIB'] = '/opt/conda/share/proj'

True

In [60]:
PATH_RAW = os.environ.get('PATH_RAW')
POSTGRES = os.environ.get('POSTGRES_DB')

In [61]:
# Instantiate connection

engine = create_engine(POSTGRES, connect_args={'options': '-csearch_path={}'.format('dt')})
connection = engine.connect()

The goal of this notebook is to create a dataset with oil and gas platforms and their infrstructure. The output should be several geojson files for use in Flourish.

For platforms there multiple datasets:
1. EMODnet - contains all platforms, alsow wit decommissioned ones, but is not fully up to date. 
2. National datasets, that have a very different structure, but has been normalised by me earlier, except for wellbores. This dataset is the most current one.

Data should be current up to 2023-03-01.

Some warnings:
- Altough the data is from official sources, it's quite messy and the datasets contain different kinds of data. Some normalization was necessary so I had to make some choices. These choices are made explicit in the code.
- The company data (operators, licence holders) have been normalized as well, where different sources were used, like the national gas and oil agencies, but also manually linking daughter and parent companies, using company registries and news articles. That process might lead to some errors, so if you use this data, you might need to check, for instance at [Mapstand](https://app.mapstand.com/). Up until now, I haven't found any mismatches with the data from Mapstand (we largely use the same sources), but just be careful. 

## Overviews

We probably need some overviews of infrastructure, like all platforms, pipelines, structures, cables, etc.

In [62]:
# Import data

infra_emod = gpd.GeoDataFrame.from_postgis(text('SELECT * FROM int_infra_emodnet'), connection, geom_col='geometry')
infra = gpd.GeoDataFrame.from_postgis(text('SELECT * FROM int_infra'), connection, geom_col='geometry')
pipes = gpd.GeoDataFrame.from_postgis(text('SELECT * FROM int_pipes'), connection, geom_col='geometry')

### Clean infrastructure emodnet

In [5]:
# Clean emod data 

new_cols = {'platformid': 'feature_id',
            'current_status': 'status',
            'category': 'type',
            'country': 'source_country'}

status = {'Operational': 'ACTIVE',
          'Closed down': 'INACTIVE',
          'Decommissioned': 'REMOVED',
          'Under construction': 'PLANNED',
          'Removed': 'REMOVED'}

country = {'Denmark': 'DK',
           'Germany': 'DE',
           'Belgium': 'BE'}

In [6]:
infra_emod['status_normalised'] = infra_emod.current_status.map(status)
infra_emod.country = infra_emod.country.map(country)
infra_emod = infra_emod.rename(columns=new_cols)
infra_emod = infra_emod[infra_emod.source_country.isin(['BE', 'DK', 'DE'])].copy()

# Drop columns

to_drop = ['index', 'id', 'function', 'location_blocks', 'weight_sub', 
           'weight_top', 'water_depth', 'coast_dist', 'name_db']

infra_emod = infra_emod.drop(to_drop, axis=1)

In [7]:
# Select relevant platforms

infra_emod['type'] = infra_emod['type'].fillna('UNKNOWN')

to_keep = ['SUBSEA STEEL', 'PLATFORM', 'FIXED STEEL', 'FLOATING STEEL', 'GRAVITY-BASED CONCRETE', 'FPSO', 'FLOATING-CONCRETE'] # FPSO is certainly debatable

infra_emod = infra_emod[infra_emod['type'].isin(to_keep)].copy()

infra_emod['type_normalised'] = 'PLATFORM'

len(infra_emod)

68

In [8]:
infra_emod.status_normalised.value_counts()

ACTIVE      63
INACTIVE     3
PLANNED      2
Name: status_normalised, dtype: int64

### Clean infrastructure national sources

We have to make some choices on what to include, because there are many more structures in the datasets, like buoys. We don't want those. 

In [9]:
# Drop columns for infra

infra = infra.drop(['level_0', 'index'], axis=1)

# Filter out platforms 

keep = ['PLATFORM', 'MONOTOWER', 'FPSO', 'FSO', 'CONDEEP 4 SHAFTS', 'CONDEEP 3 SHAFTS', 'JACKET 12 LEGS', 'JACKET 8 LEGS', 'JACKET 4 LEGS', 'JACKET 6 LEGS',
       'CONDEEP MONOSHAFT', 'TLP STEEL', 'DORIS', 'TLP CONCRETE', 'JACKET TRIPOD', 'SEMISUB STEEL', 'SEMISUB CONCRETE', 'JACK-UP 3 LEGS', 'MOPUSTOR', 'FSU', 'SPAR']

infra_platforms = infra[infra.type_normalised.isin(keep)].copy()

len(infra_platforms)

721

In [10]:
infra_platforms.status_normalised.value_counts()

ACTIVE            553
REMOVED           105
NOT IN USE         41
ABANDONED          14
PARTLY REMOVED      5
INACTIVE            2
PLANNED             1
Name: status_normalised, dtype: int64

### Merge dataset

Add German, Belgian and Danish platforms

In [11]:
# Set to the same CRS

infra_emod = infra_emod.to_crs(25831)

In [12]:
platforms = pd.concat([infra_emod, infra_platforms])
len(platforms)

789

In [13]:
# How many duplicates? 

len(platforms[platforms.name.duplicated()])

67

In [72]:
# Drop them

platforms = platforms.drop_duplicates(subset='name', keep='first')
len(platforms)

722

### Get EEZ data

Some data on countries is incomplete and we have some platforms in the Irish Sea that should be excluded

In [15]:
# Get layer name

fiona.listlayers('../../data/Spatial/INT/World_EEZ_v11_20191118_gpkg/eez_v11.gpkg')

['eez_v11']

In [16]:
eez = gpd.read_file('../../data/Spatial/INT/World_EEZ_v11_20191118_gpkg/eez_v11.gpkg', layer='eez_v11')

In [17]:
# Select relevant countries by id

countries = [5668.0, 5696.0, 5686.0, 5674.0, 5669.0, 3293.0]

eez = eez[eez.MRGID.isin(countries)][['ISO_TER1', 'geometry']].copy()

In [18]:
countries = {'BEL': 'BE', 
             'NLD': 'NL',
             'DEU': 'DE',
             'DNK': 'DK',
             'GBR': 'UK',
             'NOR': 'NO'}

eez = eez.rename(columns={'ISO_TER1': 'country'})

eez.country = eez.country.map(countries)
eez = eez.reset_index()


In [19]:
eez.to_file('../data/visuals/eez.geojson', driver='GeoJSON')

In [20]:
# Set new crs for platforms for joining

eez = eez.to_crs(25831)

# Spatial join

platforms = gpd.sjoin(platforms, eez, predicate='intersects', how='left')

# Drop columns

platforms = platforms.drop(['index', 'index_right', 'source_country'], axis=1)

In [21]:
platforms.status_normalised = platforms.status_normalised.str.replace('NOT IN USE', 'INACTIVE')

In [22]:
# Create radius geometry

platforms['radius'] = platforms.geometry.buffer(500)

In [23]:
# Write radius to file
radius = platforms.drop('geometry', axis=1)

In [24]:
radius = radius.set_geometry('radius')

In [25]:
# Write to geojson
radius = radius.to_crs(4326)
radius.to_file('../data/visuals/radius.geojson', driver='GeoJSON')

### Get wellbores

In [68]:
engine = create_engine(POSTGRES, connect_args={'options': '-csearch_path={}'.format('public')})
connection = engine.connect()

In [69]:
# Import data

wellbores_int = gpd.read_file('../data/shapes/EMODnet_HA_OG_Wells_20230222/EMODnet_HA_OG_Wells_20230222.shp')
len(wellbores_int)

27267

In [70]:
# Filter countries to get Denmark, Germany and Belgium

countries = ['Denmark', 'Germany', 'Belgium']
int_selection = wellbores_int[wellbores_int.COUNTRY.isin(countries)].copy()
int_selection.columns = int_selection.columns.str.lower()
len(int_selection)

1012

In [71]:
int_selection.to_postgis('int_wellbores', connection)

In [29]:
no = gpd.read_file('../../data/Spatial/NO/NPD_FactMapsData_v3_0.gdb/', layer='WELLBORE')

In [30]:
cols = {'wlbWellboreName': 'name', 
        'wlbEntryDate': 'start_date',
        'wlbWellType': 'type', 
        'wlbDrillingOperator': 'operator', 
        'wlbCompletionDate': 'end_date', 
        'wlbContent': 'content', 
        'wlbPurpose': 'purpose', 
        'wlbStatus': 'status', 
        'wlbPluggedDate': 'plugged_date', 
        'wlbPluggedAbandonDate': 'plugged_abandoned_date', 
        'geometry': 'geometry'}

no = no.rename(columns=cols)

no_selection = no[cols.values()].copy()

In [31]:
no_selection.start_date = pd.to_datetime(no_selection.start_date).dt.year.astype('Int64')
no_selection.end_date = pd.to_datetime(no_selection.end_date).dt.year.astype('Int64')
no_selection.plugged_date = pd.to_datetime(no_selection.plugged_date).dt.year.astype('Int64')

In [32]:
wellbore_uk = gpd.read_file('../../data/Spatial/UK/Well_Bottom_Holes_ED50.shp')

In [33]:
cols = {'SUBOPGRP': 'operator',
        'COMPLEDATE': 'end_date',
        'CURRWELLIN': 'purpose',
        'WELLOPSTAT': 'status',
        'FLUIDTYPES': 'content',
        'COMPLESTAT': 'content_details',
        'NAME': 'name',
        'SPUDDATE': 'start_date',
        'geometry': 'geometry'}

uk_selection = wellbore_uk.rename(columns=cols)[cols.values()]

In [34]:
uk_selection.start_date = uk_selection.start_date.apply(lambda x: x[0:4]).astype('Int64')
uk_selection.end_date = uk_selection.end_date.apply(lambda x: x[0:4]).astype('Int64')

In [35]:
nl = gpd.GeoDataFrame.from_postgis(text('SELECT * FROM nl_wellbores'), connection, geom_col='geometry')

In [36]:
cols = {'well_type': 'purpose',
        'well_result': 'content',
        'operator':'operator',
        'start_date_drilling': 'start_date',
        'end_date_drilling': 'end_date',
        'geometry': 'geometry', 
        'status': 'status', 
        'well_result': 'content', 
        'identification': 'name'}

nl_selection = nl.rename(columns=cols)[cols.values()]

In [37]:
nl_selection = nl_selection[(nl_selection['content'] != 'Zout') & (nl_selection['content'] != 'Steenkool')].copy()

In [38]:
nl_selection.start_date = pd.to_datetime(nl_selection.start_date).dt.year.astype('Int64')
nl_selection.end_date = pd.to_datetime(nl_selection.end_date).dt.year.astype('Int64')

In [39]:
# Set new crs

no_selection = no_selection.to_crs(25831)
uk_selection = uk_selection.to_crs(25831)
nl_selection = nl_selection.to_crs(25831)
int_selection = int_selection.to_crs(25831)

In [40]:
wellbores = pd.concat([no_selection, uk_selection, nl_selection, int_selection])
wellbores = wellbores.set_crs(25831)

In [41]:
len(wellbores)

26972

In [42]:
wellbores = wellbores.drop(['code', 'year', 'drilling_c', 'coast_dist', 'water_dept'], axis=1)

In [43]:
wellbores['type'].value_counts()

DEVELOPMENT    5713
EXPLORATION    2054
OTHER          1240
Name: type, dtype: int64

In [44]:
content = {'Dry': 'dry',
           'Condensate, , Oil': 'condensate,oil',
           'Condensate, Gas': 'condensate,gas', 
           'Condensate, Gas, Oil': 'condensate,gas,oil', 
           'Technisch mislukt': 'failed', 
           'Bronwater': 'water', 
           'OIL SHOWS': 'oil', 
           'Oil, Water': 'oil,water', 
           'CUTTINGS': 'cuttings', 
           'Olie shows': 'oil', 
           'GAS SHOWS': 'gas', 
           'Olie met gas shows': 'gas,oil', 
           'Gas en olie shows': 'gas,oil', 
           'Other': 'unknown', 
           'Gas shows': 'gas', 
           'GAS/CONDENSATE': 'condensate,gas', 
           'CO2': 'co2', 
           'OIL': 'oil', 
           'OIL/GAS SHOWS': 'gas,oil', 
           'Gas met olie shows': 'gas,oil', 
           'OIL/GAS': 'gas,oil', 
           'nan': 'unknown', 
           'DRY': 'dry', 
           'Condensate': 'condensate', 
           'Olie': 'oil', 
           'GAS': 'gas', 
           'SHOWS': 'unknown', 
           'Gas': 'gas', 
           'NOT AVAILABLE': 'unknown', 
           'WATER/GAS': 'gas,water', 
           'Condensate, Gas, , Water': 'condensate,gas,water', 
           'Onbekend': 'unknown', 
           'Natural Gas': 'gas', 
           'Water': 'water', 
           'Droog': 'dry', 
           'Olie en gas': 'gas,oil', 
           'Gas, Oil': 'gas,oil', 
           'OIL/GAS/CONDENSATE': 'condensate,gas,oil', 
           'NOT APPLICABLE': 'unknown', 
           'WATER': 'water', 
           'Oil': 'oil', 
           'Crude Oil': 'oil'}

well_type = {'OTHER': 'unknown',
             'DEVElOPMENT': 'development',
             'EXPLORATION': 'exploration'}

status = {'BLOWOUT': 'suspended', 
          'RE-CLASS TO DEV': 'unknown', 
          'PREDRILLED': 'active', 
          'Decomissioned': 'decommissoned', 
          'N/A': 'unknown', 
          'PLUGGED': 'suspended or abandoned', 
          'Closed-in': 'suspended or abandoned', 
          'CLOSED': 'suspended or abandoned', 
          'Suspended': 'suspended or abandoned', 
          'DRILLING': 'active', 
          'INJECTING': 'active', 
          'Plugged back and sidetracked': 'suspended or abandoned', 
          'Plugged': 'suspended or abandoned', 
          'Active': 'active', 
          'Completed to well': 'active',
          'WILL NEVER BE DRILLED': 'suspended or abandoned', 
          'Constructed': 'active', 
          'Abandoned': 'suspended or abandoned',
          'Constructing': 'planned', 
          'JUNKED': 'abandonsuspended or abandoneded', 
          'SUSPENDED': 'suspended or abandoned', 
          'PRODUCING': 'active', 
          'Producing/Injecting': 'active', 
          'RE-CLASS TO TEST': 'unknown', 
          'Observing': 'unknown', 
          'P&A': 'active'}



In [45]:
wellbores['status_normalised'] = wellbores.status.map(status)
wellbores['type_normalised'] = wellbores['type'].map(well_type)
wellbores['content_normalised'] = wellbores.content.map(content)
wellbores.status_normalised = wellbores.status_normalised.str.replace('abandonsuspended or abandoneded', 'abandoned or suspended')

In [46]:
# Join with eez for countries

wellbores = gpd.sjoin(wellbores, eez, predicate='intersects', how='left')

In [47]:
wellbores = wellbores.drop(['index_right', 'index', 'country_left'], axis=1)
wellbores = wellbores.rename(columns={'country_right': 'country'})

In [48]:
# Write all wellbores to geojson

wellbores = wellbores.to_crs(4326)
wellbores.to_file('../data/visuals/wellbores_all.geojson', driver='GeoJSON')

In [49]:
# And change crs back

wellbores = wellbores.to_crs(25831)

### Clip and write to file

In [52]:
# Clip infrastructure and pipeline datasets

platform_infra = gpd.clip(infra, platforms['radius'])
pipes_infra = gpd.clip(pipes, platforms['radius'])
wellbores_infra = gpd.clip(wellbores, platforms['radius'])

In [54]:
# Write to file

platforms = platforms.to_crs(4326)
platforms['longitude'] = platforms.geometry.x
platforms['latitude'] = platforms.geometry.y
platforms = platforms.drop('radius', axis=1)
platforms.to_file('../data/visuals/platforms.geojson', driver='GeoJSON')

platform_infra = platform_infra.to_crs(4326)
platform_infra['longitude'] = platform_infra.geometry.x
platform_infra['latitude'] = platform_infra.geometry.y
platform_infra.to_file('../data/visuals/platforms_infra.geojson', driver='GeoJSON')

platforms_total = pd.concat([platforms, platform_infra])
platforms_total.to_file('../data/visuals/platforms_total.geojson', driver='GeoJSON')

pipes_infra = pipes_infra.to_crs(4326)
pipes_infra.to_file('../data/visuals/pipes.geojson', driver='GeoJSON')

wellbores_infra = wellbores_infra.to_crs(4326)
wellbores_infra['longitude'] = wellbores_infra.geometry.x
wellbores_infra['latitude'] = wellbores_infra.geometry.y
wellbores_infra.to_file('../data/visuals/wellbores_infra.geojson', driver='GeoJSON')

In [55]:
infra_total = pd.concat([platform_infra, wellbores_infra])
infra_total.to_file('../data/visuals/infra_total.geojson', driver='GeoJSON')

### Merge platforms/infra with licences and normalized company names

So now we have 4 dataframes:
1. Platforms
2. The infrastructure surrounding the platforms
3. The pipelines surrounding the platforms
4. The wellbores

We can't get all current company data, but we can normalize the company names as much as possible and add licence owner data to the platforms, so we get a sense of who is responsible for that licence area.

In [None]:
# Add normalised company names as lists

com = pd.read_sql(text('SELECT * FROM current_licences_companies'), connection)
com_norm = pd.read_sql(text('SELECT * FROM company_names'), connection)

com = pd.merge(com, com_norm, left_on='name', right_on='name_db', how='left')

com_to_merge = com.groupby(['licence_id'])[['name_international',  'name_local', 'country_local','country_international', 'group_name_from_source']].agg(lambda x: list(set(list(x)))).reset_index()

In [None]:
def get_licence_and_company(df, coms):
    
    engine = create_engine(POSTGRES, connect_args={'options': '-csearch_path={}'.format('public')})
    connection = engine.connect()
    
    # Get licence data
    licence = gpd.GeoDataFrame.from_postgis(text('SELECT * FROM all_current_licences'), connection, geom_col='geometry')
    licence = licence.to_crs(4326)
    
    # Perform spatial join on licences
    
    df = gpd.sjoin(df,
            licence,
            how='left',
            predicate='intersects')
    
    # Perform spatial join on companies
    
    df = pd.merge(df, 
                coms,
                left_on = 'licence_name',
                right_on = 'licence_id',
                how='left')

    # Clean it up
    
    df = df.drop(['status_right', 'index_right'], axis=1).rename(columns={'status_left': 'status'})
    
    print(f'Merged {len(df)}, but could not merge {len(df[df.name_international.isna()])} because of missing company names')
    
    return df

In [None]:
platforms_com = get_licence_and_company(platforms, com_to_merge)

In [None]:
# What are we missing? A lot of Denmark. Maybe add these manually.

platforms_com[['status_normalised', 'country']][platforms_com.name_international.isna()].value_counts()

## Analysis

### Platforms

In [None]:
# Let's see what we have

platforms_com.columns

In [None]:
platforms_com.status_normalised.value_counts()

#### Inactive platforms

In [None]:
# Filter out inactive platforms

inactive = ['INACTIVE', 'ABANDONED', 'PARTLY REMOVED']

inactive = platforms_com[platforms_com.status_normalised.isin(inactive)].copy()

print(f'There are {len(inactive)} inactive platforms')

In [None]:
# Inactive platforms by country

inactive.country.value_counts()

In [None]:
# Create function for plotting ownership

def plot_ownership(df, col):
    return df.assign(count=df[col].str.split(', '))\
    .explode(col)\
    .groupby([col]).size()\
    .reset_index(name='count').sort_values(by='count', ascending=False).nlargest(columns='count', n=10).plot(kind='bar', x=col)

In [None]:
plot_ownership(inactive, 'name_international')

In [None]:
plot_ownership(inactive, 'name_local')

In [None]:
# Aggregate operators (shows companies with most inactive properties

plot_ownership(inactive, 'name_normalised')

In [None]:
# Where are the licence holders of inactive platforms located (parent company)

plot_ownership(inactive, 'country_international')

#### Removed

In [None]:
removed = platforms_com[platforms_com.status_normalised == 'REMOVED'].copy()
len(removed)

In [None]:
removed.country.value_counts()

In [None]:
# Licence holders parent companies

plot_ownership(removed, 'name_international')

In [None]:
# Licence holders local name

plot_ownership(removed, 'name_local')

In [None]:
# Aggregate operators (shows companies with most inactive properties

plot_ownership(removed, 'name_normalised')

In [None]:
# Where are the licence holders of inactive platforms located (parent company)

plot_ownership(removed, 'country_international')

#### All platforms

In [None]:
# Distribution over countries

platforms_com.country.value_counts()

In [None]:
# What about all platforms - licence holders parent companies?

plot_ownership(platforms_com, 'name_international')

In [None]:
# All platforms, licence holders local companies

plot_ownership(platforms_com, 'name_local')

In [None]:
# Platforms, operators

plot_ownership(platforms_com, 'name_normalised')

In [None]:
# Platforms, country parent companies (licence holders)

plot_ownership(platforms_com, 'country_international')

### Pipes

In [None]:
### All pipes
pipes = pipes.to_crs(4326)
pipes_com = get_licence_and_company(pipes, com_to_merge)

In [None]:
plot_ownership(pipes_com, 'name_international')

In [None]:
plot_ownership(pipes_com, 'name_normalised')

In [None]:
plot_ownership(pipes_com, 'country_international')

In [None]:
pipes['type'].value_counts()

### Pipes in radius

In [None]:
pipes_infra_com = get_licence_and_company(pipes_infra, com_to_merge)

In [None]:
plot_ownership(pipes_infra_com, 'name_international')

In [None]:
plot_ownership(pipes_infra_com, 'name_local')

In [None]:
# Operators

plot_ownership(pipes_infra_com, 'name_normalised')

### Wellbores

In [None]:
wellbores = wellbores.to_crs(4326)

In [None]:
wellbores_com = get_licence_and_company(wellbores, com_to_merge)

In [None]:
plot_ownership(wellbores_com, 'name_international')

In [None]:
plot_ownership(wellbores_com, 'operator')