In [None]:
import sys
from geopy import *
import pandas as pd
import numpy as np

import tyndp_to_pypsa

sys.path.insert(1, '../')
import utils

In [None]:
#### 2020 ####
column_semantics_2020 = {
    'Investment number': 'investment_id',
    'This investment belongs to project number…': 'project_id',
    'Commissioning Year': 'commissioning_year',
    'Status ID\n1 : Under Consideration,\n2 : In Planning but not permitting,\n3 : In permitting,\n4 : Under Construction': 'status',
    'Type of Element': 'asset_type',
    'Substation From': 'substation_1',
    'Substation To': 'substation_2',
    'Technology': 'ac_dc',
    'Total route length (km)': 'specified_length_km',
    'Description of the investment': 'description'
}

status_map_2020 = {
    1: 'under_consideration', # under consideration
    2: 'planned_not_yet_permitting', # planning, not permitting
    3: 'in_permitting', # in permitting
    4: 'under_construction'  # under construction
}
# No need to separate AC and DC as there is a separate column
# in the TYNDP data.
asset_type_map_2020 = {
    'ACTransmissionLine': 'line',
    'DCTransmissionLine': 'line',
    'OnshoreSubstation': 'substation',
    'OffshoreSubstation': 'substation',
    # 'cable' will be mapped to 'line' after adding column 'underground' of binary values.
    'OffshoreDCTransmissionCable': 'cable',
    'OffshoreACTransmissionCable': 'cable'
}

excel_2020, sheet_2020 = r'../2020/transmission.xlsx', 'Trans.Investments'

In [None]:
#### 2018 ####
column_semantics_2018 = {
    'Investment ID': 'investment_id',
    'ExpectedCommissioningYear': 'commissioning_year',
    'Status': 'status',
    'ElementsType': 'asset_type',
    'From': 'substation_1',
    'To': 'substation_2',
    'TechnologyType': 'ac_dc',
    'VoltageLevel (kV)': 'voltage',
    'TotalRouteLength (km)': 'specified_length_km'
}

status_map_2018 = {
    'under consideration': 1,
    'planned but not yet permitting': 2,
    'permitting': 3,
    'under construction': 4
}
asset_type_map_2018 = {
    'Overhead Line': 'line',
    'Substation': 'substation',
    'Subsea Cable': 'cable',
    'Underground Cable': 'cable'
}

excel_2018, sheet_2018 =  r'../2018/TYNDP_2018_Project_List.xlsx', 'Sheet1'

In [None]:
#### 2016 ####
column_semantics_2016 = {
    'TYNDP 2016 Investment Index': 'investment_id',
    'TYNDP 2016 expected commissioning date': 'commissioning_year',
    'TYNDP 2016 status name': 'status',
    'Element type': 'asset_type',
    'From Substation name': 'substation_1',
    'To Substation name': 'substation_2',
    'AC or DC': 'ac_dc',
    'Voltage Level': 'voltage',
    'Line length [km]': 'specified_length_km'
}
# TODO: commissioned, planning, design
# TODO: rather specify rows we want to keep? E.g. via list
# TODO: map values to strings (semantics!), then choose only those that are 'in permitting' or 'under construction'
status_map_2016 = {
    'under consideration': 1,
    'Design & Permitting': 2,
    'permitting': 3,
    'under construction': 4
}
asset_type_map_2016 = {
    'Overhead Line': 'line',
    'Substation': 'substation',
    'Subsea Cable': 'cable',
    'Underground Cable': 'cable'
}

excel_2016, sheet_2016 = r'../2016/TYNDP2016_Projects data.xlsx', 'Investments'

In [None]:
#### 2014 ####
column_semantics_2014 = {
    'Investment index': 'investment_id',
    'Expected date of commissioning': 'commissioning_year',
    'Present status': 'status',
    'Substation 1': 'substation_1',
    'Substation 2': 'substation_2',
    'Description': 'description'
}
# TODO: commissioned, planning, design
# TODO: rather specify rows we want to keep? E.g. via list
# TODO: map values to strings (semantics!), then choose only those that are 'in permitting' or 'under construction'
status_map_2014 = {
    'under consideration': 1,
    'Design & Permitting': 2,
    'permitting': 3,
    'under construction': 4
}

excel_2014, sheet_2014 = r'../2014/TYNDP2014_Projects_and_CBA assessment.xlsx', 'Investments' 

In [None]:
#### 2012 ####
column_semantics_2012 = {
    'Investment number': 'investment_id',
    'Expected date of commissioning': 'commissioning_year',
    'Present status': 'status',
    'Substation 1': 'substation_1',
    'Substation 2': 'substation_2',
    'Brief technical description': 'description'
}
status_map_2012 = {
    'under consideration': 1,
    'Design & Permitting': 2,
    'permitting': 3,
    'under construction': 4
}

excel_2012, sheet_2012 = r'../2012/120705_Table of projects_TYNDP 2012_package_FINAL.xlsx', 'TYNDP 2012 report' 

In [None]:
#### 2010 ####
column_semantics_2010 = {
    'REF on map': 'investment_id',
    'Expected time of commissioning': 'commissioning_year',
    'Progress status': 'status',
    'Substation 1': 'substation_1',
    'Substation 2': 'substation_2',
    'Project characteristics': 'description'
}
status_map_2010 = {
    'under consideration': 1,
    'Design & Permitting': 2,
    'permitting': 3,
    'under construction': 4
}

excel_2010, sheet_2010 = r'../2010/Table_of_projects_for_publication.xls', 'TABLE OF PROJECTS' 

In [None]:
wanted = utils.prepare_tyndp_data(excel_2020, sheet_2020, column_semantics_2020, status_map_2020, asset_type_map=asset_type_map_2020,header_row=1, base_url=r'https://tyndp2020-project-platform.azurewebsites.net/projectsheets/transmission/')
wanted.head()

In [None]:
lines     = wanted.loc[wanted['asset_type'] == 'line']
new_subst = wanted.loc[wanted['asset_type'] == 'substation']
lines     = lines.query("substation_1 not in @new_subst")
lines     = lines.query("substation_2 not in @new_subst")

In [None]:
# TODO: temporary. Implement functionality to get substations of line length not given.
lines = lines.loc[~lines.specified_length_km.isna()]

# Use bus names from buses.csv (v0.1.0)
See https://github.com/PyPSA/pypsa-eur/blob/v0.1.0rc/data/entsoegridkit/buses.csv. Data is from 2017 (newer gridkit extracts do not contain 'tags' with substation names).

In [None]:
curated_buses = tyndp_to_pypsa.extract_name_country()
curated_buses.head()

## There are substations which share the same name but have different coordinates
- large deviation between coordinates => substations are most likely in different countries 
    - BUT: it does occur that different places in the same country get the same name
- small deviation between coordinates => reference to same substation (error in gridextract?)

In [None]:
# TODO: added 'NI' although Northern Ireland probably appears in PyPSA as 'GB'. Find a better solution.
pypsa_countries = ['AL', 'AT', 'BA', 'BE', 'BG', 'CH', 'CZ', 'DE', 'DK', 'EE', 'ES', 'FI', 'FR', 'GB', 'GR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'NI', 'NL', 'NO', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK']

### List of all duplicates

In [None]:
duplicated = curated_buses.loc[curated_buses.name.duplicated()]
duplicated = duplicated.query("country in @pypsa_countries")

# for name in duplicated.name.unique():
#     print(name)
#     for index, row in curated_buses.query('name == @name').iterrows():
#         print(f"({row['x']}, {row['y']}), {row['country']}")
#     print('----')

### Same name and country, large deviations

In [None]:
curated_buses.query("name == 'Yuzhnaya'")

### Same name, different country, large deviation

In [None]:
curated_buses.query("name == 'Saida'")

In [None]:
curated_buses.query("name == 'Titan'")

## (TODO) Add new substations

In [None]:
# on_subst

# extract country if it matches regex
# otherwise, np.NAN

## Remove '(\<Country Code\>) ' from tyndp substation name strings, add new column instead
Otherwise, this could negatively impact the Levenshtein distance.

In [None]:
lines = tyndp_to_pypsa.prepare_substation_names(lines)
lines.head()

## create mapping from all unique tyndp substation names to substation names from 'buses'

In [None]:
tyndp_to_bus = tyndp_to_pypsa.tyndp_to_substation(lines, curated_buses)

In [None]:
# a, b = 'Turleenan', 'Guillena'
# a, b = 'Pyhanselka', 'Pyhänselkä'
# a, b = 'Tuomela B', 'Tudela'
# a, b =  'Heviz (HU) \\ Zerjavinec', 'Žerjavinec'
# Levenshtein.distance(a.lower(), b.lower())

# Match start- and endpoints of lines to substations from buses.csv

In [None]:
results = tyndp_to_pypsa.match_tyndp_with_buses(lines, tyndp_to_bus, curated_buses)
results.head()

In [None]:
# TODO: only join with result to compare original location and substation names with inferred
# result = lines.copy()
# result = result.join(coordinates)

percentage = results.index.size / lines.index.size
print(f'{percentage * 100}% of lines are probably correct.')

# print('Lines where we probably found the correct coordinates:')
# result.loc[~result.s1.isna()]

# Determine remaining coordinates using geopy

In [None]:
error_lines = lines.loc[~lines.index.isin(results.index)]

In [None]:
geopy_results = tyndp_to_pypsa.match_tyndp_with_geopy(error_lines)
geopy_results.head()

In [None]:
results = results.append(geopy_results)

percentage = results.index.size / lines.index.size
print(f'{percentage * 100}% of lines are probably correct.')

# Map commissioning dates to yearly values

In [None]:
results['commissioning_year'] = tyndp_to_pypsa.commissioning_dates_to_year(results['commissioning_year'])

In [None]:
results

# Load lines.csv and add commissioning years
- find out which lines are updated (e.g. with higher voltage)
- add new lines
- add fixed commissioning year to lines where we're missing this information

In [None]:
import numpy as np
import scipy.spatial

In [None]:
def match_with_pypsa_lines(tyndp, lines_file='lines.csv', distance_upper_bound=1.25):
    # based on '_find_closest_links'
    # from https://github.com/PyPSA/pypsa-eur/blob/master/scripts/base_network.py
    pypsa_lines = (pd.read_csv(lines_file, quotechar="'", true_values=['t'], false_values=['f'],
                         dtype=dict(line_id='str', bus0='str', bus1='str', under_construction="bool"))
             .set_index('line_id'))
    pypsa_lines['length'] /= 1e3

    treecoords = np.asarray([np.asarray(shapely.wkt.loads(s))[[0, -1]].flatten()
                          for s in pypsa_lines.geometry])
    querycoords = np.vstack([tyndp[['x1', 'y1', 'x2', 'y2']], 
                         tyndp[['x2', 'y2', 'x1', 'y1']]])
    tree = scipy.spatial.KDTree(treecoords)
    dist, ind = tree.query(querycoords, distance_upper_bound=distance_upper_bound)

    found_b = ind < len(pypsa_lines)
    found_i = np.arange(len(tyndp) * 2)[found_b] % len(tyndp)

    matched = pd.DataFrame(data=pypsa_lines.index[ind[found_b] % len(pypsa_lines)], index=tyndp.index[found_i])

    return tyndp.join(matched)

In [None]:
# TODO: find out bus ids for new lines