In [None]:
from sqlalchemy import create_engine, text
import geopandas as gpd
import os
import pandas as pd
from dotenv import load_dotenv
import infrastructure as infra
import numpy as np

load_dotenv('.env')
POSTGRES = os.environ.get('POSTGRES_DB')

The goal of this notebook is to create a dataset with oil and gas platforms and their infrstructure. The output should be several geojson files for use in Flourish.

For platforms there multiple datasets:
1. EMODnet - contains all platforms, alsow wit decommissioned ones, but is not fully up to date. 
2. National datasets, that have a very different structure, but has been normalised by me earlier, except for wellbores. This dataset is the most current one.

Data should be current up to 2023-03-01.

Some warnings:
- Altough the data is from official sources, it's quite messy and the datasets contain different kinds of data. Some normalization was necessary so I had to make some choices. These choices are made explicit in the code.
- The company data (operators, licence holders) have been normalized as well, where different sources were used, like the national gas and oil agencies, but also manually linking daughter and parent companies, using company registries and news articles. That process might lead to some errors, so if you use this data, you might need to check, for instance at [Mapstand](https://app.mapstand.com/). Up until now, I haven't found any mismatches with the data from Mapstand (we largely use the same sources), but just be careful. 

## Overviews

We probably need some overviews of infrastructure, like all platforms, pipelines, structures, cables, etc.

In [None]:
platforms = infra.get_platforms(['all'], eez=True, only_platforms=False)

In [None]:
infrastructure = platforms[platforms.type_normalised != 'Platform'].copy()
len(infrastructure)

In [None]:
platforms = platforms[platforms.type_normalised == 'Platform'].drop_duplicates(subset='infra_name').copy()
len(platforms)

In [None]:
pipes = infra.get_pipelines(['all'], eez=True)

In [None]:
wellbores = infra.get_wellbores(['all'], eez=True)

In [None]:
# Geopandas doesn't like datetimes, so let's convert it to string.

wellbores[['start_date', 'end_date']] = wellbores[['start_date', 'end_date']].astype(str)

### Add ownership data from licences

In [None]:
engine = create_engine(POSTGRES, connect_args={'options': '-csearch_path={}'.format('public')})
connection = engine.connect()

# Add normalised company names as lists

com = pd.read_sql(text('SELECT * FROM current_licences_companies'), connection)
com_norm = pd.read_sql(text('SELECT * FROM companies'), connection)

com = pd.merge(com, com_norm, left_on='name', right_on='name_db', how='left')

com_to_merge = com.groupby(['licence_id'])[['name_international', 'name_local','country_international']].agg(lambda x: list(set(list(x)))).reset_index()

com_to_merge = com_to_merge.rename(columns={'name_international': 'owner_name_normalised', 
                                            'name_local': 'owner_name',
                                            'country_international': 'owner_country'})

In [None]:
com_to_merge.head()

In [None]:
def get_licence_and_company(df, coms):
    
    engine = create_engine(POSTGRES, connect_args={'options': '-csearch_path={}'.format('public')})
    connection = engine.connect()
    
    # Get licence data
    licence = gpd.GeoDataFrame.from_postgis(text('SELECT * FROM all_current_licences'), connection, geom_col='geometry')
    
    # Perform spatial join on licences
    
    df = gpd.sjoin(df,
                licence[['geometry', 'licence_name']],
                how='left',
                predicate='intersects')
    df = df.drop(['index_right'], axis=1)
    
    # Perform spatial join on companies
    
    df = pd.merge(df, 
                coms,
                left_on = 'licence_name',
                right_on = 'licence_id',
                how='left')

    # Clean it up
    
    df = df.drop_duplicates(subset=['feature_id', 'country', 'infra_name'], keep='first').copy()
    
    print(f'Merged {len(df)}, but could not merge {len(df[df.owner_name_normalised.isna()])} because of missing company names')
    
    return df

In [None]:
platforms = get_licence_and_company(platforms, com_to_merge)

In [None]:
platform_infra = get_licence_and_company(infrastructure, com_to_merge)

In [None]:
pipes_infra = get_licence_and_company(pipes, com_to_merge)

In [None]:
wellbores_infra = get_licence_and_company(wellbores, com_to_merge)

In [None]:
platforms.to_csv('../data/inactive/no_platforms.csv', index=False)

In [None]:
platform_infra.to_csv('../data/inactive/no_infra.csv', index=False)

In [None]:
pipes_infra.to_csv('../data/inactive/no_pipes.csv', index=False)

In [None]:
wellbores_infra.to_csv('../data/inactive/no_wellbores.csv', index=False)

### Add radius to platforms

In [None]:
# Create radius geometry

platforms['radius'] = platforms.geometry.buffer(500)

# Write radius to file

radius = platforms.drop('geometry', axis=1)
radius = radius.set_geometry('radius')
radius = radius.to_crs(4326)
#radius[['owner_name_normalised', 'owner_name', 'owner_country']] = radius[['owner_name_normalised', 'owner_name', 'owner_country']].astype(str)
#radius.to_file('../data/visuals/radius.geojson', driver='GeoJSON')

### Clip and write to file

In [None]:
# Clip infrastructure and pipeline datasets

platform_infra = gpd.clip(infrastructure, platforms['radius'])
pipes_infra = gpd.clip(pipes, platforms['radius'])
wellbores_infra = gpd.clip(wellbores, platforms['radius'])
#infra_total_infra = gpd.clip(infra_total, platforms['radius'])

In [None]:
def write_radius_to_geojson(df, name):
    df = df.to_crs(4326)
    if name != 'pipes':
        df['longitude'] = df.geometry.x
        df['latitude'] = df.geometry.y
    if 'radius' in df.columns:
        df = df.drop('radius', axis=1)
    df['dataset'] = name
    #df[['owner_name_normalised', 'owner_name', 'owner_country', 'name_normalised']] = df[['owner_name_normalised', 'owner_name', 'owner_country', 'name_normalised']].astype(str)
    if 'owner' in df.columns:
        df.owner = df.owner.astype(str)
    if 'purpose' in df.columns:
        df.purpose = df.purpose.astype(str)
    df.to_file(f'../data/visuals/{name}_radius.geojson', driver='GeoJSON')

In [None]:
def write_all_to_geojson(df,name):
    df = df.to_crs(4326)
    df['dataset'] = name
    if name != 'pipes':
        df['longitude'] = df.geometry.x
        df['latitude'] = df.geometry.y
    else:
        pass
    df['dataset'] = name
    #df = df.drop('radius', axis=1)
    #df[['owner_name_normalised', 'owner_name', 'owner_country', 'name_normalised']] = df[['owner_name_normalised', 'owner_name', 'owner_country', 'name_normalised']].astype(str)
    df.to_file(f'../data/visuals/{name}_all.geojson', driver='GeoJSON')
    return df

In [None]:
# Write to file

dfs = [platforms, platform_infra, pipes_infra, wellbores_infra]
names = ['platforms', 'infrastructure', 'pipes', 'wellbores']

for df, name in zip(dfs, names):
    write_radius_to_geojson(df, name)


In [None]:
well = gpd.read_file('../data/visuals/wellbores_radius.geojson')
plat = gpd.read_file('../data/visuals/platforms_radius.geojson')
infr = gpd.read_file('../data/visuals/infrastructure_radius.geojson')

df = pd.concat([well, plat, infr])
df.to_file('../data/visuals/infra_points_total_radius.geojson', driver='GeoJSON')

In [None]:
df = df[~df.infra_type.isin(['MATTRESS', 'PROTECTION'])]
df.to_file('../data/visuals/infra_points_selected_radius.geojson', driver='GeoJSON')

## Analysis

### Platforms

In [None]:
platforms.status_normalised.value_counts()

#### Inactive platforms

In [None]:
# Filter out inactive platforms

inactive = ['Partly removed', 'Not in use', 'Abandoned']

inactive = platforms[platforms.status_normalised.isin(inactive)].copy()

print(f'There are {len(inactive)} inactive platforms')

In [None]:
# Inactive platforms by country

inactive.country.value_counts()

In [None]:
#for i, row in inactive.iterrows():
#    print(f'{row.country.upper()}: {row.infra_name}')

In [None]:
# Filter out removed platforms

removed = platforms[platforms.status_normalised=='Removed']

len(removed)

In [None]:
removed.country.value_counts()

In [None]:
#for i, row in removed.iterrows():
#    print(f'{row.country.upper()}: {row.infra_name}')

In [None]:
def get_ownership(df, col):
    return df.assign(count=df[col].str.split(', '))\
    .explode(col)\
    .groupby([col]).size()\
    .reset_index(name='count').sort_values(by='count', ascending=False)

In [None]:
dfs = [platforms, platform_infra, pipes_infra, wellbores_infra]
names = ['platforms', 'platform_infra', 'pipes_infra', 'wellbores_infra']
cols = ['owner_name_normalised', 'owner_country', 'name_normalised']
colnames = ['owner', 'owner_country', 'operator_name']

to_merge = []
for col, colname in zip(cols, colnames):
    for df, name in zip(dfs, names):
        owner = get_ownership(df, col)
        owner['dataset'] = name
        owner.to_csv(f'../data/visuals/{name}_{colname}_status.csv', index=False)
        to_merge.append(owner)
    
        

In [None]:
import glob

In [None]:
dfs = []
for file in glob.glob('../data/visuals/*_operator_name_status.csv'):
    df = pd.read_csv(file)
    dfs.append(df)
df = pd.concat(dfs)
df.to_csv('../data/visuals/operator_name_total.csv', index=False)

In [None]:
df.to_csv('../data/visuals/total_status.csv', index=False)

In [None]:
owner = get_ownership(platforms, 'owner_name_normalised')

In [None]:
owner.to_csv('../data/visuals/platforms_infra_owner_status.csv', index=False)

In [None]:
for i, row in owner.iterrows():
    print(f'{row.country}: {row["count"]}')

In [None]:
# Create function for plotting ownership

def plot_ownership(df, col):
    return df.assign(count=df[col].str.split(', '))\
    .explode(col)\
    .groupby([col]).size()\
    .reset_index(name='count').sort_values(by='count', ascending=False).nlargest(columns='count', n=10).plot(kind='bar', x=col)

In [None]:
infra = get_licence_and_company(wellbores, com_to_merge)

In [None]:
infra.country.value_counts()

In [None]:
inactive = infra[infra.status_normalised.isin(['Not in use', 'Abandoned'])]

In [None]:
removed = infra[infra.status_normalised == 'Removed']

In [None]:
owner = get_ownership(inactive, 'owner_country')

In [None]:
for i, row in owner.iterrows():
    print(f'{row.owner_country}: {row["count"]}')

### Pipes

In [None]:
# All pipes
#pipes = pipes.to_crs(4326)
pipes_com = get_licence_and_company(pipes, com_to_merge)

In [None]:
# Owners (licence holders)

plot_ownership(pipes_com, 'owner_name_normalised')

In [None]:
# Operators

plot_ownership(pipes_com, 'name_normalised')

In [None]:
# Owner country

plot_ownership(pipes_com, 'owner_country')

In [None]:
pipes_com['type_normalised'].value_counts()

### Pipes in radius

In [None]:
pipes_infra_com = get_licence_and_company(pipes_infra, com_to_merge)

In [None]:
plot_ownership(pipes_infra_com, 'owner_name_normalised')

In [None]:
plot_ownership(pipes_infra_com, 'name_local')

In [None]:
# Operators

plot_ownership(pipes_infra_com, 'name_normalised')

### Wellbores

In [None]:
#wellbores = wellbores.to_crs(4326)

In [None]:
wellbores_com = get_licence_and_company(wellbores, com_to_merge)

In [None]:
# Owner (by licence)

plot_ownership(wellbores_com, 'owner_name_normalised')

In [None]:
# Operators

plot_ownership(wellbores_com, 'name_normalised')

### Total infra in radius

In [None]:
# Licence holder

plot_ownership(infra_total_infra, 'owner_name_normalised')

In [None]:
# Operator

plot_ownership(infra_total_infra, 'name_normalised')

In [None]:
plot_ownership(infra_total_infra, 'owner_country')

## Deep dive into platforms

In [None]:
df_platforms = platforms.to_crs(4326)
df_platform_infra = platform_infra.to_crs(4326)
df_pipes = pipes_infra.to_crs(4326)
df_wellbores = wellbores_infra.to_crs(4326)

In [None]:
# Look for interesting platforms

df = pd.concat([df_platforms, df_platform_infra, df_pipes, df_wellbores])

In [None]:
df = df.to_crs(4326)

In [None]:
# Create subset of inactive 

inactive = ['Not in use', 'Abandoned']

In [None]:
inactive[(inactive.licence_name.str.contains('P18')) & (inactive.licence_name.notna()) & (inactive.country=='nl')]

In [None]:
df.type_normalised.value_counts()

In [None]:
# Groupby licence area and get overview of properties with many structures

inactive = df[(df.type_normalised == 'Platform') & (df.status_normalised.isin(inactive))]
inactive.groupby(['licence_name'])['feature_id'].agg(count = np.size).nlargest(20, 'count')

In [None]:
inactive.type_normalised.value_counts()

# Interesting projects:
1. 018: ekofisk ([wiki](https://nl.wikipedia.org/wiki/Ekofisk))
2. P117: brent ([shell decommissioning page](https://www.shell.co.uk/sustainability/decommissioning/brent-field-decommissioning/the-brent-story.html))
3. Q01c-diep 
4. Forties ([wiki](https://en.wikipedia.org/wiki/Forties_Oil_Field))
5. P18a might be interesting: close to the Porthos project. Abandoned ([wiki](https://nl.wikipedia.org/wiki/P18-veld))

In [None]:
inactive[(inactive.type_normalised=='Other') & (inactive.infra_type.isin(['MANIFOLD', 'LOADING SYSTEM', 'TEE PIECE', 'STORAGE TANK']))].licence_name.value_counts()

In [None]:
# Select platform of interest (poi)

poi = df[df.licence_name=='F16a & F16b']
poi[['type_normalised', 'name_normalised']].value_counts()

In [None]:
poi.owner_name_normalised.value_counts()

In [None]:
poi_to_file.head()

In [None]:
poi_to_file = poi.copy()
poi_to_file = poi_to_file.drop('radius', axis=1)
poi_to_file[['name_normalised', 'owner_country', 'owner_name_normalised', 'owner_name']] = poi_to_file[['name_normalised', 'owner_country', 'owner_name_normalised', 'owner_name']].astype(str)
poi_to_file.to_file('../data/visuals/f16a_f16b.geojson', driver='GeoJSON')

In [None]:
poi.explore()