In [11]:
from geopy.geocoders import Nominatim
from geopy import distance
import pandas as pd
import numpy as np
import math
import re

In [7]:
excel = 'transmission.xlsx'
sheet = 'Trans.Investments'

df = pd.read_excel(excel, sheet_name=sheet)

# TODO: might not work in later versions, use sanity check.
df.columns = df.iloc[0]
df         = df.drop(index=df.index[0])

# TODO: also contains new substations!

wanted_columns = ['Investment number',
                  'This investment belongs to project number…',
                  'Commissioning Year',
                  'Status ID\n1 : Under Consideration,\n2 : In Planning but not permitting,\n3 : In permitting,\n4 : Under Construction',
                  'Type of Element',
                  'Substation From',
                  'Substation To',
                  'Technology',
                  'Total route length (km)']
df[wanted_columns]

Unnamed: 0,Investment number,This investment belongs to project number…,Commissioning Year,"Status ID\n1 : Under Consideration,\n2 : In Planning but not permitting,\n3 : In permitting,\n4 : Under Construction",Type of Element,Substation From,Substation To,Technology,Total route length (km)
1,2,1,2024,2,ACTransmissionLine,Pedralva (PT),Sobrado (PT),AC,67
2,4,1,2022,3,ACTransmissionLine,V.Minho (by Ribeira de Pena),Feira (by Ribeira de Pena),AC,131
3,474,1,2021,3,OnshoreSubstation,Ribeira de Pena (PT),-,AC,0
4,18,4,2022,3,ACTransmissionLine,Beariz (ES),Fontefria (ES),AC,30
5,496,4,2022,3,ACTransmissionLine,Fontefria (ES),Vila Nova de Famalicão (PT) (By Ponte de Lima),AC,140.21
...,...,...,...,...,...,...,...,...,...
317,1747,1077,2036,1,DCTransmissionLine,Thessaloniki,Dubrovo,DC,110
318,1748,1077,2036,1,DCTransmissionLine,Dubrovo,Ch. Mogila,DC,170
319,1745,1081,2036,1,OffshoreDCTransmissionCable,Tobruk,Arachtos,DC,1070
320,1749,1081,2036,1,DCTransmissionLine,Arachtos,Elbasan,DC,180


In [48]:
wanted   = df[wanted_columns]
ac_lines = wanted.loc[wanted['Type of Element'] == 'ACTransmissionLine']
ac_lines = ac_lines.reindex(columns=ac_lines.columns.tolist() + ['x1', 'y1', 'x2', 'y2', 'coord_distance'])
ac_lines = ac_lines.loc[]

In [51]:
ac_lines.loc[ac_lines['Status ID%'] >= 3]

KeyError: 'Status ID%'

In [94]:
locator = Nominatim(user_agent='esm_group')
geocode = RateLimiter(locator.geocode, min_delay_seconds=0.05)

In [95]:
x1 = []
y1 = []
x2 = []
y2 = []

error_rows = []

for index, row in ac_lines.iterrows():
    fr   = row['Substation From']
    to   = row['Substation To']
    dist = row['Total route length (km)']

    fr_loc = geocode(fr)
    to_loc = geocode(to)

    if fr_loc is None or to_loc is None:
        error_rows.append([row.values])
        continue
        
    fr_coords  = fr_loc.latitude, fr_loc.longitude
    to_coords  = to_loc.latitude, to_loc.longitude
    coord_dist = distance.distance(fr_coords, to_coords).km

    if math.isclose(coord_dist, dist, rel_tol=0.25):
        x1.append(fr_coords[0])
        y1.append(fr_coords[1])
        
        x2.append(to_coords[0])
        y2.append(to_coords[1])
    else:
        error_rows.append([row.values])
        # x1.append(np.NAN)
        # y1.append(np.NAN)
        
        # x2.append(np.NAN)
        # y2.append(np.NAN)

In [99]:
len(x1), len(error_rows)

(55, 122)

In [92]:
error_rows

[array([4, 1, '2022', 3, 'ACTransmissionLine',
        'V.Minho (by Ribeira de Pena)', 'Feira (by Ribeira de Pena)', 'AC',
        131, nan, nan, nan, nan, nan], dtype=object),
 array([496, 4, '2022', 3, 'ACTransmissionLine', 'Fontefria (ES)',
        'Vila Nova de Famalicão (PT) (By Ponte de Lima)', 'AC', 140.21,
        nan, nan, nan, nan, nan], dtype=object),
 array([60, 23, '2022', 4, 'ACTransmissionLine', 'Avelin/Mastaing (FR)',
        'Horta (BE)', 'AC', 80, nan, nan, nan, nan, nan], dtype=object)]

# Use bus names from buses.csv (v0.1.0)

In [5]:
buses_file = 'buses_v0.1.0.csv'
# see base_network.py in PyPSA-Eur repository
buses = (pd.read_csv(buses_file, quotechar="'",
                     true_values='t', false_values='f',
                     dtype=dict(bus_id="str"))
        .set_index("bus_id")
        .drop(['station_id'], axis=1)
        .rename(columns=dict(voltage='v_nom')))

In [42]:
test = buses.iloc[0].tags.split(',')
test = [s.strip() for s in test]

In [32]:
test

['"oid"=>"637"',
 '"country"=>"SA"',
 '"visible"=>"1"',
 '"capacity"=>"-1.0"',
 '"name_eng"=>"Jeddah"',
 '"name_nat"=>" "',
 '"objectid"=>"657.0"',
 '"duplicate"=>"2"',
 '"mb_symbol"=>"substation"',
 '"annotation"=>""']

In [33]:
p = re.compile(r'"(?P<key>\w+)"=>"(?P<value>[\w\s\-\.]*)"')
m = p.match(test[0])

In [37]:
m.group('value')

'637'

## Add columns name_eng and country to buses df

In [113]:
split_regex = r'("\w+"=>"[^"]*"),'

tag_regex   = r'"(?P<key>\w+)"=>"(?P<value>[^"]*)"'
tag_pattern = re.compile(tag_regex)

names     = []
countries = []

for index, row in buses.iterrows():
    name    = ''
    country = ''
    
    tags = row['tags']
    # print(tags)
    
    if tags is not np.NAN:
        tags = re.split(split_regex, tags)
        tags = list(filter(None, [s.strip() for s in tags]))
        print(tags)
    
        for tag in tags:
            m = tag_pattern.match(tags)
            
            if m is None:
                print(tag)
            
            key   = m.group('key')
            value = m.group('value')
        
            if key == 'name_eng':
                name = value
            elif key == 'country':
                country = value
            
    names.append(name)
    countries.append(countries)

['"oid"=>"637"', '"country"=>"SA"', '"visible"=>"1"', '"capacity"=>"-1.0"', '"name_eng"=>"Jeddah"', '"name_nat"=>" "', '"objectid"=>"657.0"', '"duplicate"=>"2"', '"mb_symbol"=>"substation"', '"annotation"=>""']


TypeError: expected string or bytes-like object

In [57]:
# string = '"oid"=>"2801", "country"=>"UA", "visible"=>"1", "capacity"=>"-1.0", "name_eng"=>"Promet., Прометей", "name_nat"=>" ", "objectid"=>"2928.0", "duplicate"=>"1", "mb_symbol"=>"substation", "annotation"=>"Promet., Прометей"'
# string = '"oid"=>"2740", "country"=>"UA", "visible"=>"1", "capacity"=>"-1.0", "name_eng"=>"Kryvorizka TPP", "name_nat"=>"Криворізька ТЕС", "objectid"=>"2867.0", "duplicate"=>"1", "mb_symbol"=>"other_nl", "annotation"=>"Kryvorizka TPP""oid"=>"2796", "country"=>"UA", "visible"=>"1", "capacity"=>"-1.0", "name_eng"=>"Kirova", "name_nat"=>"Кірова", "objectid"=>"2923.0", "duplicate"=>"1", "mb_symbol"=>"substation", "annotation"=>"Kirova"'
m = p.match(string)

In [80]:
tag_regex = r'(?P<tag>"(?P<key>\w+)"=>"(?P<value>[^"]*))"'
p         = re.compile(tag_regex)

string = '"name_eng"=>"Promet., Прометей", "name_nat"=>" ", "objectid"=>"2928.0", "duplicate"=>"1", "mb_symbol"=>"substation", "annotation"=>"Promet., Прометей"'
m = p.match(string)
m.groups()

AttributeError: 'NoneType' object has no attribute 'groups'

In [96]:
string = '"name_eng"=>"Promet., Прометей", "name_nat"=>" ", "objectid"=>"2928.0"'

pattern = r'("\w+"=>"[^"]*"),'
splitted = re.split(pattern, string)

In [106]:
splitted = list(filter(None, [s.strip() for s in splitted]))

In [107]:
splitted

['"name_eng"=>"Promet., Прометей"', '"name_nat"=>" "', '"objectid"=>"2928.0"']