In [754]:
from bs4 import BeautifulSoup
import re

import pandas as pd
import numpy as np
from fuzzywuzzy import fuzz, process

In [None]:
# parsing contingency list
# extracting lines, generator, and transformers
# additionally, capacitor, phase shifter, series device can be added
# per: https://www.pjm.com/markets-and-operations/energy/lmp-model-info.aspx

In [724]:
soup = BeautifulSoup(open("data/PJM Contingency Definitions_5_15_2020.htm"), "html.parser")
content = soup.body.pre.contents
content = content[0]

In [725]:
parsed = content.split("\n")
parsed = list(map(lambda x: x.strip(), parsed))

In [1033]:
data = {}

for s in parsed:
    if bool(re.search("[YN]{25}\s[YN]{25}", s)):
        s = s[:58]
        idx = s.split("  ")[0].strip()
        name = s.split("  ")[1].strip()
        data[idx] = [name]
    else:
        idx = list(data.keys())[-1]
        data[idx].append(s)

In [1034]:
# check duplicate values in different lists
# check remaining values

# a lot of low voltage lines (below 69kV it seems?) don't seem to be included?
# a lot of generators don't seem to be modeled as contingencies? Why? Because they are peaker units or renewables?
# does that mean maybe a lot of lines are not modeled as well?


# get contingencies with lines, generators, and transformers
lines = []
transformers = []
generators = []

for value in data.values():
    contingency = value[0]
    
    # deal with combo
    contingency = list(map(lambda x: x.strip(), re.split(r"\+|\&", contingency)))
    
    # iterate through components in contingency
    for s in contingency:
        
        # check if it's a line
        if bool(re.search("^L\d+\.", s)):
            lines.append(s)
        
        # check if it's a transformer
        elif bool(re.search("^\d+/\d+\.", s)):
            transformers.append(s)

        # check if it's a generator
        elif bool(re.search("^[A-Za-z]+\.", s)):
            if bool(re.search("^RAS.", s)):
                continue
            else:
                generators.append(s)    
        else:
            print(value[0], '---------', s)

For TT Transfer Interface Select --------- For TT Transfer Interface Select
138/69.Loretto.AT1&2 --------- 2
138/69.Edgemoor.AT1&AT2 --------- AT2
500/138.Cabot.T1&T3 (Sctnlz F) --------- T3 (Sctnlz F)
DBL:CalvertCliffs.U1 & CalvertCliffs.U2 --------- DBL:CalvertCliffs.U1
(NUKE)PeachBottom.U2 --------- (NUKE)PeachBottom.U2
(NUKE)PeachBottom.U3 --------- (NUKE)PeachBottom.U3
NE ISO LARGEST UNIT TRIP (SIMULATED AT SANDYPOND) Y --------- NE ISO LARGEST UNIT TRIP (SIMULATED AT SANDYPOND) Y
RAS.L765.Chateauguay-Massena + Marcy-Massen(NYISO)Y --------- Marcy-Massen(NYISO)Y
Burlington.CT121 + CT122 + CT123 + CT124 --------- CT122
Burlington.CT121 + CT122 + CT123 + CT124 --------- CT123
Burlington.CT121 + CT122 + CT123 + CT124 --------- CT124
L138.Burlingtn-Levitwn-Cinam-Camdn.J1310&BuCT91-94Y --------- BuCT91-94Y
L500.Branchburg-Elroy.5017 + Elroy.T1&T2 --------- T2
500/13/230.Limerick.T4A&T4B --------- 500/13/230.Limerick.T4A
500/13/230.Limerick.T4A&T4B --------- T4B
MILLSTONE STATION Units 

345/138.Harding.TR1 + TR2 --------- TR2
345/138.Juniper.T2&T3 --------- T3
Lemoyne (FE) 345/138 kV #1 Transformer --------- Lemoyne (FE) 345/138 kV #1 Transformer
Lemoyne (FE) 345/138 kV #2 Transformer --------- Lemoyne (FE) 345/138 kV #2 Transformer
Shenango (FE) 345/138 kV #1 Transformer --------- Shenango (FE) 345/138 kV #1 Transformer
Shenango (FE) 345/138 kV #2 Transformer --------- Shenango (FE) 345/138 kV #2 Transformer
L138.Greene-09Airway + 09Airway.T7 --------- 09Airway.T7
09URBANA - Bath 138 kV Line --------- 09URBANA - Bath 138 kV Line
L138.Monument-Overlook-Knollwod + Transformers --------- Transformers
09SIDNEY 138/69 kV Transformer --------- 09SIDNEY 138/69 kV Transformer
09TREBEI 138/69 kV Transformer --------- 09TREBEI 138/69 kV Transformer
Wefostor - 02woodvj 138 kV line --------- Wefostor - 02woodvj 138 kV line
2015LINE: DULLES-RESTON --------- 2015LINE: DULLES-RESTON
L230.Glebe-Pentagon.2037+230/69.Pentagon.TX3&4 --------- 4
Bath County.U1 --------- Bath County.U1
B

224 LINE: LANEXA-NRTHERNNECK and 2024 for mkts --------- 224 LINE: LANEXA-NRTHERNNECK and 2024 for mkts
1517 Stuck@GreenA: OLIVE-GreenAcres & StJohn-Green --------- 1517 Stuck@GreenA: OLIVE-GreenAcres
1517 Stuck@GreenA: OLIVE-GreenAcres & StJohn-Green --------- StJohn-Green
JacksonsFerry-Huffman 138kv & Galax cap switching --------- JacksonsFerry-Huffman 138kv
JacksonsFerry-Huffman 138kv & Galax cap switching --------- Galax cap switching
DBL.Chesapke-Deep Crk & Commonwealth 115 kV lines --------- Commonwealth 115 kV lines
WILTON CTR-DUMONT 765KV & GARYAVE-DUNEACRES 345KV --------- WILTON CTR-DUMONT 765KV
WILTON CTR-DUMONT 765KV & GARYAVE-DUNEACRES 345KV --------- GARYAVE-DUNEACRES 345KV
WILTON CTR-DUMONT 765KV & SHEFFIELD-GARYAVE 345KV --------- WILTON CTR-DUMONT 765KV
WILTON CTR-DUMONT 765KV & SHEFFIELD-GARYAVE 345KV --------- SHEFFIELD-GARYAVE 345KV
DBL:Hanna-Juniper & Hanna-Chamberlin --------- DBL:Hanna-Juniper
DBL:Hanna-Juniper & Hanna-Chamberlin --------- Hanna-Chamberlin
DBL:St

DBL.L345.LeeCounty-Nelson & Nelson-ElectricJct --------- Nelson-ElectricJct
L345.Burnham-StateLine-Sheffild+StateLineTR81&TR82 --------- StateLineTR81
L345.Burnham-StateLine-Sheffild+StateLineTR81&TR82 --------- TR82
DBL:L345.Byron-CherryVall.0621+Byron-Wempltwn.0624 --------- DBL:L345.Byron-CherryVall.0621
DBL:L345.Byron-CherryVall.0621+Byron-Wempltwn.0624 --------- Byron-Wempltwn.0624
Spec:Penelec N-1-1 ERI_W-WAY + ERI_E-SRIP --------- Spec:Penelec N-1-1 ERI_W-WAY
Spec:Penelec N-1-1 ERI_W-WAY + ERI_E-SRIP --------- ERI_E-SRIP
Spec:Penelec N-1-1 ERI_W-WAY + ERI_S-4MI --------- Spec:Penelec N-1-1 ERI_W-WAY
Spec:Penelec N-1-1 ERI_W-WAY + ERI_S-4MI --------- ERI_S-4MI
Spec:Penelec N-1-1 ERI_W-WAY + ERI_W-ASH-PERY --------- Spec:Penelec N-1-1 ERI_W-WAY
Spec:Penelec N-1-1 ERI_W-WAY + ERI_W-ASH-PERY --------- ERI_W-ASH-PERY
Spec:Penelec N-1-1 ERI_E-S_RIP + ERI_W-ASH-PERY --------- Spec:Penelec N-1-1 ERI_E-S_RIP
Spec:Penelec N-1-1 ERI_E-S_RIP + ERI_W-ASH-PERY --------- ERI_W-ASH-PERY
Spec:Pe

TRPL: HATH-ROCK MT 2058 & 2181 & ROCK MT-BATT 123A --------- TRPL: HATH-ROCK MT 2058
TRPL: HATH-ROCK MT 2058 & 2181 & ROCK MT-BATT 123A --------- 2181
TRPL: HATH-ROCK MT 2058 & 2181 & ROCK MT-BATT 123A --------- ROCK MT-BATT 123A
138/69/34.HickoryCreek.T4 + 34.Bendixtp.Y Closing --------- 138/69/34.HickoryCreek.T4
138/69/34.HickoryCreek.T4 + 34.Bendixtp.Y Closing --------- 34.Bendixtp.Y Closing
TEMP: L345.Beaver-DavisBesse+345/138.Hayes.TR1 --------- TEMP: L345.Beaver-DavisBesse
TEMP:L138.Brgn-ERfrd.R-1344+L138.Brgn-Frlwn.M-1339 --------- TEMP:L138.Brgn-ERfrd.R-1344
TEMP:L138.IUPurdue-RobisonPrk w/ LdSwap+IndstrlCap --------- TEMP:L138.IUPurdue-RobisonPrk w/ LdSwap
TEMP:L138.IUPurdue-RobisonPrk w/ LdSwap+IndstrlCap --------- IndstrlCap
TEMP:L138.IUPurdue-Spyrun w/ LdSwap + IndstrialCap --------- TEMP:L138.IUPurdue-Spyrun w/ LdSwap
TEMP:L138.IUPurdue-Spyrun w/ LdSwap + IndstrialCap --------- IndstrialCap
DTX230.LewisRun.TR6 --------- DTX230.LewisRun.TR6
Loretto-Vienna 13780 + 23002 Outa

DTX138.Gavin.DWAS (Station Serv) + Gavin.U1&U2 --------- DTX138.Gavin.DWAS (Station Serv)
DTX138.Gavin.DWAS (Station Serv) + Gavin.U1&U2 --------- U2
138/34.48Highland.TR79+TR72 --------- TR72
Temp:L138.Fowles-Hummel-Fox.Q13 + Hummel-Essex.2 --------- Temp:L138.Fowles-Hummel-Fox.Q13
Temp:L138.Fowles-Hummel-Fox.Q13 + Hummel-Essex.2 --------- Hummel-Essex.2
TEMP:230/138.EDanville.T4 (Scntlz F) + LX.EMonumnt --------- TEMP:230/138.EDanville.T4 (Scntlz F)
Temp:Sammis CR5 Station Serv + U5&U6 --------- Temp:Sammis CR5 Station Serv
Temp:Sammis CR5 Station Serv + U5&U6 --------- U5
Temp:Sammis CR5 Station Serv + U5&U6 --------- U6
TEMP:Sammis CR5 Station Serv + U5&U6&U7 --------- TEMP:Sammis CR5 Station Serv
TEMP:Sammis CR5 Station Serv + U5&U6&U7 --------- U5
TEMP:Sammis CR5 Station Serv + U5&U6&U7 --------- U6
TEMP:Sammis CR5 Station Serv + U5&U6&U7 --------- U7
TEMP:Sammis CR4 + 345.EastBus + CR5 + U5&U6&U7 --------- TEMP:Sammis CR4
TEMP:Sammis CR4 + 345.EastBus + CR5 + U5&U6&U7 --------- 

In [728]:
# see if there are 1) duplicates within a list, 2) any overlap between lists
# import collections
# print([item for item, count in collections.Counter(lines_2).items() if count > 1])

# import collections

# print([item for item, count in collections.Counter(lines_2).items() if count > 1])

In [1035]:
print("lines:", len(lines))
print("unique line:", len(set(lines)))
print()

print("transformer:", len(transformers))
print("unique transformer:", len(set(transformers)))
print()

print("generator:", len(generators))
print("unique generator:", len(set(generators)))
print()

print("total:", len(lines)+len(transformers)+len(generators))
print("unique total:", len(set(lines+transformers+generators)))
print()

print("total contingencies (however note that combo counts as 1):", len(data.keys()))
print("missing (at least):", len(data.keys()) - len(set(lines+transformers+generators)))

lines: 5037
unique line: 4879

transformer: 1873
unique transformer: 1791

generator: 1152
unique generator: 787

total: 8062
unique total: 7457

total contingencies (however note that combo counts as 1): 8894
missing (at least): 1437


# deal with lines

In [1300]:
lines = []

for key in data.keys():
    s = data[key][0]
    
    # deal with combo
    combos = list(map(lambda x: x.strip(), re.split(r"\+|\&", s)))
    
    # iterate over combos
    for s in combos:

        # eliminate RAS if exists - what does it mean?
        if s[:3] == "RAS":
            s = s[4:].strip()

        # eliminate TEMP if exists - what does it mean?
        if s[:4] in ["TEMP", "Temp"]:
            s = s[5:].strip()

        # eliminate DBL if exists - what does it mean? Double losses?
        if s[:4] in ["DBL:", "DBL.", "DBL ", "Dbl:"]:
            s = s[4:].strip()
        
        # eliminate Relay if exists - what does it mean?
        if s[:5] == "Relay":
            s = s[5:].strip()
        
        # deal with NSEW
        if s[:2] in ["N.", "S.", "E.", "W."]:
            s = s[:1] + s[2:]
        
        # deal with DTX.L if exists - what does it mean?
        if s[:5] == "DTX.L":
            s = s[4:]
            
        # deal with DAYTON
        if s[:7] == "DAYTON.":
            s = s[7:]
        
        # deal with NUKE
        if s[:6] == "(NUKE)":
            s = s[6:].strip()
        
        # check if "-" exists, implying line
        if bool(re.search("-", s)):
            
            # include for sure lines - those that start with such as L345, L69, etc.
            if re.match(r"L\d+", s):
                lines.append((s, key))
                continue

            # weed out transformers
            if re.match(r"\d+/\d+", s):
                continue
            
            # weed out generators
            if re.match(r"^[A-Za-z]+\.", s):
                continue
            
            # include other lines - those that mention "line"
            if bool(re.search("\s[Ll][Ii][Nn][Ee]", s)):
                lines.append((s, key))
                continue
            
            # include other lines - "word-word"
            if bool(re.search("[A-Za-z]+-[A-Za-z]+", s)):
                lines.append((s, key))
                continue
            
            # include other lines - "number word-number word" or "number_word-number_word"
            if bool(re.search("\d+[_\s][A-Za-z]+-\d+[_\s][A-Za-z]+", s)):
                lines.append((s, key))
                continue

            # include other lines - "word-number-word"
            if bool(re.search("[A-Za-z]+-\d+-[A-Za-z]+", s)):
                lines.append((s, key))
                continue

            # include other lines - "word - word"
            if bool(re.search("[A-Za-z]+\s-\s[A-Za-z]+", s)):
                lines.append((s, key))
                continue
            
            # include other lines - "" (i.e. ERI_S-4MI)
            if bool(re.search("[A-Za-z]+_[A-Za-z]-\d+[A-Za-z]+", s)):
                lines.append((s, key))
                continue
    
            # the rest does not seem to be lines, skip for now (except for perhaps 5011 - 2304 -110511-110512)
            continue

        # if no "-", most are not lines
        else:
            
            # some that mention "lines" are lines
            if bool(re.search("\s[Ll][Ii][Nn][Ee]", s)):
                lines.append((s, key))
                continue
            
            continue

In [1301]:
# create adjacency list representation

adj_list = {}

for line_key in lines:
    # extract vaue
    line = line_key[0]
    key = line_key[1]
    
    # get substation names and possible matches
    subs = line.split("-")
    possible_matches = list(map(lambda x: x[:13].strip(), data[key][1:]))
    
    # get voltage
    possible_voltages = {x[20:29].strip():x[29:39].strip() for x in data[key][1:] if x[13:20].strip() == "L"}
    if len(possible_voltages.keys()) == 0:
        tmp = []
        tmp+=re.findall(r'L(\d+)\.', line)
        tmp+=re.findall(r'\s\d+\s[Kk][Vv]', line)
        if len(tmp) == 1:
            possible_voltages[line] = tmp[0]
        else:
            print("Can't find appropriate voltage associated with the following contingency id:", key)
    
    # iterate thorugh the over the lines
    for i in range(len(subs)-1):
        # get substation
        s1 = subs[i]
        s2 = subs[i+1]
        
        # find substation match
        match1 = process.extractOne(s1, possible_matches)[0]
        match2 = process.extractOne(s2, possible_matches)[0]
        
        # find voltage match
        if len(possible_voltages.keys()) == 0:
            voltage_match = np.nan
        else:
            voltage_match = process.extractOne(s1+"-"+s2, possible_voltages.keys())[0]
            voltage_match = possible_voltages[voltage_match]
            voltage_match = re.findall(r'\d+', voltage_match)[0]

#         # for debugging
#         print(s1, ":", match1)
#         print(s2, ":", match2)
#         # for debugging
        
        # add to dictionary
        if match1 in adj_list.keys():
            adj_list[match1][match2] = voltage_match
        else:
            adj_list[match1] = {match2:voltage_match}
            
        if match2 in adj_list.keys():
            adj_list[match2][match1] = voltage_match
        else:
            adj_list[match2] = {match1:voltage_match}

Can't find appropriate voltage associated with the following contingency id: 10080
Can't find appropriate voltage associated with the following contingency id: 13180
Can't find appropriate voltage associated with the following contingency id: 42520
Can't find appropriate voltage associated with the following contingency id: 42620
Can't find appropriate voltage associated with the following contingency id: 60990
Can't find appropriate voltage associated with the following contingency id: 61500
Can't find appropriate voltage associated with the following contingency id: 65460
Can't find appropriate voltage associated with the following contingency id: 72950
Can't find appropriate voltage associated with the following contingency id: 72990
Can't find appropriate voltage associated with the following contingency id: 73020


### QA on Lines

# deal with transformers

In [1303]:
transformers = []

for key in data.keys():
    s = data[key][0]
    
    # deal with combo
    combos = list(map(lambda x: x.strip(), re.split(r"\+|\&", s)))
    
    # iterate over combos
    for s in combos:
        
        # check if "/" exists, implying transformer
        if bool(re.search("\d+/\d+\.", s)):
            # X doesn't exist
            print(s)

        # if no "/", most are not transformers
        else:
            # X exists
            continue
            

138/69.Middle.T3
138/69.Middle.T4
230/138.Cumberland.T2
230/69.Churchtown.T1
138/69.Lewis.T1
138/69.Lewis.T3
138/69.Lewis.T2
138/69.Corson.T1
138/69.Corson.T2
230/138.Churchtown.T4
138/69.Sherman.T2
138/69.Sherman.T3
230/138.Vienna.AT20
230/138.CedarCreek.AT20
230/138.Milford.AT20
230/138.Vienna.AT20
138/69.KingsCreek.AT1
138/69.Loretto.AT1
138/69.Cheswold.AT1
138/69.Easton.AT1
138/69.Edgemoor.AT1
138/69.NorthSeaford.AT1
138/69.NorthSeaford.AT2
138/69.Nelson.AT1
138/69.OakHall.AT1
138/69.OakHall.AT2
138/69.PineyGrove.AT1
138/69.SouthHarrington.AT1
138/69.Vienna.AT1
138/69.Vienna.AT2
138/69.WyeMills.AT1
69/25.Wattsville.T2 Y
25/4.Bayview.T2
230/138.PineyGrove.AT20
138/69.Church.AT1
230/138.Steele.AT20
230/138.Steele.AT21
138/69.Cromby.T3
138/69.Cromby.T4
500/138.Wescosville.T3
500/230.EastWindsor.T1
500/230.EastWindsor.T2
500/230.Hunterstown.T1
500/230.Brighton.T1
500/138.Cabot.T1
230/69.MartinsCreek.T21
500/138.Elroy.T3
500/230.Branchburg.500-1
500/230.Branchburg.500-2
500/230.NewFreed

230/115.Hunterstown.TR2
230/115.Suffolk.TX1
500/230.Brambleton.TX2 (Sctnlz F)
765/345.Baker.T300
345/138.JugStreet.XF5
230/115.SedgeHill.TX1
230/115.SedgeHill.TX2
230/138.Waneeta.TR7
765/138.Greentown.T1 (Sctnlz)
138/12.Dorothy.T1
138/12.Dorothy.T2
345/138.Carson.T1
230/138.Emilie.9TR
345/138.Tidd.B
TEMP:345/138.FostoriaCentral.T1 (LX)
765/500.Cloverdale.T14
500/345.Cloverdale.T15
500/345.Cloverdale.T16
138/69.Richland(AEP).T2
138/69.Richland(AEP).T3
138/26.Wickliffe.TR3
138/13.Brunswick.TR4
345/138.Bergen.345-1
345/230.Bergen.345-2
138/69.Campbell.T2
230/69.Dennis.T3
500/230.Valley.TX2 (Sctnlz F)
138/69.Trenton.BK8
138/69.ShakerRun.BK1
345/34.Cook.T9
138/69.FremontCenter.T1
345/138.BiersRun.TX1
138/69/34.ValleyIM.T1
345/34.MeadowLake.T1
345/34.MeadowLake.T1-GSU
138/69.Kammer.TX4
500/230.Lauschtown.T3
230/115.HighRidge.230-1
230/115.HighRidge.230-2
230/69.Lyons.5BK
115/46.Claysburg.T2
765/138.NorthProctorville.T2
230/69.NorthLebanon.T1
230/69.Northkill.T1
L345/138.116GoodingsGrove.TR84

In [1175]:
for key in data.keys():
    s = data[key][0]
    
    if bool(re.search("\d+/\d+", s)):
        print(s)

L138.Corson-Middle.1412 + 138/69.Middle.T3
L138.Middle-MiddleTap.1413 + 138/69.Middle.T4
230/138.Cumberland.T2
230/69.Churchtown.T1
138/69.Lewis.T1
138/69.Lewis.T3
138/69.Lewis.T2
138/69.Corson.T1
138/69.Corson.T2
230/138.Churchtown.T4
138/69.Sherman.T2
138/69.Sherman.T3
L230.Steele-Vienna.23085 & 230/138.Vienna.AT20
230/138.CedarCreek.AT20
230/138.Milford.AT20
230/138.Vienna.AT20
L138.Steele-Hillsboro-WyeMills.13761/13788
138/69.KingsCreek.AT1
L138.BasinRoad-ChurchmansTap-Keeney.13806/07
L138.ChapelStreet-Keeney.13819/20
L138.Hockesin-Valley-Mer-Milltown.13835/7
138/69.Loretto.AT1&2
138/69.Cheswold.AT1 + L69.Cheswold-Kent.6768
138/69.Easton.AT1 + L138.Steele-Easton.13712
138/69.Edgemoor.AT1&AT2
138/69.NorthSeaford.AT1
138/69.NorthSeaford.AT2
138/69.Nelson.AT1 + Nelson.69.Cap1
138/69.OakHall.AT1
138/69.OakHall.AT2
138/69.PineyGrove.AT1
138/69.SouthHarrington.AT1
138/69.Vienna.AT1
138/69.Vienna.AT2
138/69.WyeMills.AT1
L69.OakHall-Wattsville.6717 + 69/25.Wattsville.T2 Y
25/4.Bayview.T2 +

115/22.Dooms.TX1 (Sctnlz F)
115/22.Dooms.TX2 (Sctnlz F)
230/35.JeffersonStreet.T1 (Sctnlz)
230/35.JeffersonStreet.T2 (Sctnlz)
138/69.Bellefonte.T3 (Future)
230/69.Churchtown.T3
230/138.Eddystone.T9
345/138.Bayway.T4+L138.Bayway-FederalSquare.K-1311
138/34.Guilford.T1 (Sctnlz F)
138/34.Guilford.T1 (Sctnlz)
138/69.Guilford.T2 (Sctnlz F)
138/69.Guilford.T2 (Sctnlz)
138/13.WestEnd.TR18
138/13.WestEnd.TR19
138/13.WestEnd.TR17
138/13.WestEnd.TR15
138/69.Chadwick.T1
345/138.Austin.T1
345/138.Austin.T3
138/69.Reusens.T4
138/69.Reusens.T2
345/138.Pontiac.TR81
345/138.120Lombard.TR82 RF
345/138.120Lombard.TR84 RF
L230.Elimsport-Lycoming.3 + 230/69.Lycoming.3
230/35.Oceanview.T1
230/35.Oceanview.T2
L115.Joppatown-RaphaelRoad + 230/115.RaphaelRoad.1
138/69.Roanoke.T5
138/69/34.Fieldale.T3
230/115.Basin.TX6
230/35.Basin.T3
230/35.Basin.T4
500/230.Brambleton.TX1 (Sctnlz)
500/230.Brambleton.TX2 (Sctnlz)
230/115.EndlessCaverns.TX3 (Sctnlz)
230/115.EndlessCaverns.TX4 (Sctnlz)
230/115.EndlessCaverns.TX5

In [555]:
# strip transformers of information we don't need location and voltage level

stripped_transformers = {}

for t in transformers:
    voltage = t.split(".")[0]
    sub = t.split(".")[1].replace(" ", "").lower()
    stripped_transformers[sub] = voltage
    
    # are there duplicates?

In [750]:
# check if transformer is in the network

transformers_not_found = []

for t in stripped_transformers.keys():
    if t not in network.keys():
        transformers_not_found.append(t)

In [734]:
# second method: use "X" instead of regular expression of "\d+/\d\."to find transformer

transformers_ = []

for value in data.values():
    for s in value:
        if bool(re.search("\s+X\s+", s)):
            transformers_.append(s[:13].strip())

In [742]:
# diff between first and second method
# second method finds more transformers

print("second method:", len(set(transformers_)))
print("first method:", len(set(transformers)))
print("diff:", len(set(transformers_)) - len(set(transformers)))

second method: 1498
first method: 1359
diff: 139


# deal with generators

In [744]:
# another way to get generators from the contingency list different from the above

# get contingencies with lines, generators, and transformers
generators_ = []

for value in data.values():
    for s in value:
        if bool(re.search("\s+U\s+", s)):
            generators_.append(s[:13].strip())

In [749]:
print("first approach generator:", len(generators))
print("first approach generator unique:", len(set(generators)))
print()

print("second approach generator:", len(generators_))
print("second approach generator unique:", len(set(generators_)))

first approach generator: 1032
first approach generator unique: 678

second approach generator: 2292
second approach generator unique: 496


In [686]:
len(set(generators))

496

In [687]:
set(generators)

{'08BROWN',
 '09GRNVIL',
 '09MONUMT',
 '09SIDNEY',
 '09YANKEE',
 '1 LASALL',
 '101 ITAS',
 '11 FISK',
 '117 PROS',
 '12 DRESD',
 '16 WAUKE',
 '18 WILL',
 '196 KATY',
 '20 BRAID',
 '21 KINCA',
 '23RDSTRE',
 '29 JOLIE',
 '3 POWERT',
 '4 QUAD C',
 '6 BYRON',
 '9 JOLIET',
 '900 ELWO',
 '935 KEND',
 '936 LINC',
 '937 LEE',
 '938 K4PI',
 '940 CORD',
 '941 GRND',
 '942 NELS',
 '945 CRET',
 '946 UNIV',
 '948 BRIG',
 '951 AURO',
 '952 ROCK',
 '955 RIVE',
 '960 ELGI',
 '961 LEE',
 '969 ECOG',
 '970 UP N',
 '974 ZION',
 '975 OTTE',
 '976 CAYU',
 '977 GREE',
 '980 WALN',
 '982 ROCK',
 '989 TWIN',
 '994 HILL',
 'ADKINS',
 'AIREY',
 'ALLDAM6',
 'ALLENTOW',
 'ALLOY',
 'ALTOONA',
 'AMOS',
 'ARCHBTAP',
 'ASYLUM',
 'ATHENS',
 'ATLANTIC',
 'AVONLAK2',
 'BAKER',
 'BARTONVI',
 'BATHCO',
 'BAYONNE',
 'BAYSHOR2',
 'BAYVIEW',
 'BEAR CRK',
 'BEARGRDN',
 'BEARROCK',
 'BEAV DUQ',
 'BEAVER',
 'BELEWS',
 'BELLEHAV',
 'BELLMEAD',
 'BERGEN',
 'BIGBY',
 'BIGSANDY',
 'BIRCHWD',
 'BIRDBORO',
 'BISHOPHI',
 'BLACKOAK',
 

In [694]:
# check generators from contingency list with bus system

found = []
not_found = []

for x in generators:
    if x in node_list[node_list.type == "GEN"].substation.to_list():
        found.append(x)
    else:
        not_found.append(x)

In [697]:
not_found

['MONR AE',
 'CHURCHTO',
 'CHATEAUG',
 'MILLSTON',
 'MILLSTON',
 'MILLSTON',
 'SANDYPON',
 'COMERFOR',
 'HIGHGATE',
 'HQNEWBRU',
 'CHATEAUG',
 'CHATEAUG',
 'CHATEAUG',
 'INDEPEND',
 'INDEPEND',
 'INDEPEND',
 'JAFITZP',
 'MILLSTON',
 'MILLSTON',
 'MILLSTON',
 'MILLSTON',
 'MILLSTON',
 'CHATEAUG',
 'CHATEAUG',
 'CHATEAUG',
 'CHATEAUG',
 'CHATEAUG',
 'HQNEWBRU',
 'HIGHGATE',
 'COMERFOR',
 'CHATEAUG',
 'CHATEAUG',
 'MADISON',
 'CLOVERD2',
 'CLOVERD2',
 'CAPITOLH',
 'MODOC',
 'HOLSTON',
 'SCHAHFER',
 'SCHAHFER',
 'SCHAHFER',
 'SCHAHFER',
 'MICHIGA2',
 'MILL',
 'RIOGRAND',
 'CLINTON4',
 'GHENT_LG',
 'PALISAD2',
 'CAMPBELL',
 'CAMPBELL',
 'CAMPBELL',
 'BUCH NY',
 'INDIANPT',
 'FWLRMISO',
 'MONROE',
 'MONROE',
 'MONROE',
 'MONROE',
 'CHATEAUG',
 'CHATEAUG',
 'CHATEAUG',
 'EDWARDS3',
 'MCGUIRE4',
 'BELEWS',
 'BELEWS',
 'MARSHAL4',
 'MARSHAL4',
 'MCGUIRE4',
 'CLIFFSID',
 'CLIFSID',
 'CATAWBA4',
 'CATAWBA4',
 'GHENT_LG',
 'GHENT_LG',
 'GHENT_LG',
 'HAEFLING',
 'MILL_CRE',
 'MILL_CRE',
 'MILL_CRE'

In [705]:
len(set(not_found))

57

In [717]:
# check bus system with contingecy list nodes
not_found_ = []


for x in node_list[node_list.type == "GEN"].substation.to_list():
    if x not in generators:
        not_found_.append(x)

In [721]:
set(not_found_)

{'08HAMLTN',
 '100 SHAD',
 '107 DIXO',
 '122 BELV',
 '123 MARE',
 '126 STAT',
 '13 CRAWF',
 '131 W CH',
 '134 LAGR',
 '135 ELMH',
 '139 MEND',
 '154 LIBE',
 '176 STIL',
 '193 MCHE',
 '212 NORT',
 '214 HOFF',
 '216 MOUN',
 '21WWVSTA',
 '282 ZION',
 '409 JOLI',
 '43 WILDW',
 '54 CLYBO',
 '55 HEGEW',
 '559 WOOD',
 '56 NO AU',
 '577 S EL',
 '61 STREA',
 '64 BELLW',
 '72 GOOSE',
 '73 CHI H',
 '79 SPAUL',
 '81 TOULO',
 '908 MOLE',
 '932 ROCK',
 '933 TWIN',
 '944 SE C',
 '949 KELL',
 '959ERDBS',
 '979 MEND',
 '981 CRES',
 'ABSECON',
 'AHOSKIE',
 'ALDENE',
 'AMPOR',
 'ANTIETAM',
 'ARNOLDCR',
 'ARNOLDRE',
 'ATHENIA',
 'AVERY',
 'BAGLEY',
 'BARBADOS',
 'BEAVERBR',
 'BECKJORD',
 'BELLEVI2',
 'BELLEVIL',
 'BENNETTS',
 'BERRHYD',
 'BETHESDA',
 'BIRDNECK',
 'BISMARK',
 'BLACKWAL',
 'BLAIRSVE',
 'BLGR PJM',
 'BLUEJACK',
 'BOARDMAN',
 'BOONSBOR',
 'BOWL_GR2',
 'BOWL_GR4',
 'BOWL_GR6',
 'BRANDYWI',
 'BREMO4',
 'BRIDGEWA',
 'BRUNSWIC',
 'BUCHANAN',
 'BUCHANCO',
 'BUCK',
 'BUCKNGHM',
 'BUCKPL',
 'BUCKTIE

In [589]:
# second way to classify generators

generators_ = []

for value in data.values():
    for i, v in enumerate(value):    
        if 'U' == v:
            print(value)
            generators_.append(value[i-1])

['L69.Monroe-Vineland.0711', 'MONR', 'AE', 'L', 'MON-VINE', '69', 'KV', 'L', 'MONR', 'AE', 'OP', 'OCB', 'L', '69', 'KV', 'CB', 'VINELAND', 'OP', 'OCB', 'U', '69', 'KV', 'CB']
['CarllsCorner.CT1', 'CARLLS', 'OP', 'G', '69', 'KV', 'DIS', 'CARLLS', 'U', 'CT_1', '69', 'KV', 'U']
['CarllsCorner.CT1', 'CARLLS', 'OP', 'G', '69', 'KV', 'DIS', 'CARLLS', 'U', 'CT_1', '69', 'KV', 'U']
['CarllsCorner.CT2', 'CARLLS', 'U', 'CT_2', '69', 'KV', 'U', 'CARLLS', 'OP', 'H', '69', 'KV', 'DIS']
['CarllsCorner.CT2', 'CARLLS', 'U', 'CT_2', '69', 'KV', 'U', 'CARLLS', 'OP', 'H', '69', 'KV', 'DIS']
['L69.ChurchTown-Salem-Woodstown.0724', 'SALEM', 'AE', 'LD', 'MANTMILL', '69', 'KV', 'LD', 'SALEM', 'AE', 'U', 'MANNINGT', '69', 'KV', 'U', 'SALEM', 'AE', 'LD', 'LOAD2', '69', 'KV', 'LD', 'SALEM', 'AE', 'LD', 'LOAD1', '69', 'KV', 'LD', 'SALEM', 'AE', 'L', 'CHU-SAL', '69', 'KV', 'L', 'CHURCHTO', 'OP', 'ES', '69', 'KV', 'CB', 'CHURCHTO', 'OP', 'DT', '69', 'KV', 'CB', 'SALEM', 'AE', 'L', 'SAL-WOO', '69', 'KV', 'L', 'WOOD

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




['230/115.Staunton.TX6', 'STAUNTON', 'OP', 'H672', '230', 'KV', 'CB', 'STAUNTON', 'OP', 'L672', '115', 'KV', 'CB', 'STAUNTON', 'X', 'TX6', '230', 'KV', 'X', '70540', 'NineMile.U1', '(NYISO)', 'Y', 'Y', 'Y', '6', 'Y', 'Y', 'Y', 'Y', 'NINMILNY', 'U', 'NINEMIL1', '23', 'KV', 'U', 'NINMILNY', 'X', 'TB1', '345', 'KV', 'X', '70550', 'NineMile.U2', '(NYISO)', 'Y', 'Y', 'Y', '6', 'Y', 'Y', 'Y', 'Y', 'SCRIBA', 'OP', 'R925', '345', 'KV', 'CB', 'SCRIBA', 'OP', 'R230', '345', 'KV', 'CB', 'SCRIBA', 'U', 'NINEMIL2', '25', 'KV', 'U', '70560', 'Independence.5CC', '(NYISO)', 'Y', 'Y', 'Y', '6', 'Y', 'Y', 'Y', 'Y', 'INDEPEND', 'OP', 'R925', '345', 'KV', 'CB', 'INDEPEND', 'U', 'ST5', '18', 'KV', 'U', 'INDEPEND', 'OP', 'R250', '345', 'KV', 'CB', 'INDEPEND', 'U', 'GT2', '18', 'KV', 'U', 'INDEPEND', 'U', 'GT1', '18', 'KV', 'U', '70580', '138/69.BlackRiver.T1', 'Y', 'Y', 'Y', '6', 'Y', 'Y', 'Y', 'Y', 'BLACKRIV', 'OP', '1', '138', 'KV', 'CB', 'BLACKRIV', 'OP', '4', '138', 'KV', 'CB', 'BLACKRIV', 'OP', '7', '

['DTX230.LewisRun.TR6', 'LEWISRN', 'OP', 'B63', '230', 'KV', 'CB', 'LEWISRN', 'OP', 'B64', '230', 'KV', 'CB', 'LEWISRN', 'LD', 'TR6', '230', 'KV', 'LD', '98009', 'L138.Bosserman-Marquette', 'Y', 'Y', 'Y', '6', 'Y', 'Y', 'Y', 'Y', 'BOSSERMN', 'L', 'BOS-MAR', '138', 'KV', 'L', 'BOSSERMN', 'OP', 'E', '138', 'KV', 'CB', 'BOSSERMN', 'OP', 'E2', '138', 'KV', 'CB', 'MARQUETT', 'OP', 'C', '138', 'KV', 'CB', 'MARQUETT', 'LD', 'T2', '138', 'KV', 'LD', '98010', 'L138.Marquette-Kuchar-SilverLake', 'Y', 'Y', 'Y', '6', 'Y', 'Y', 'Y', 'Y', 'KUCHAR', 'L', 'KUC-SIL', '138', 'KV', 'L', 'KUCHAR', 'LD', 'LIQUID', '138', 'KV', 'LD', 'MARQUETT', 'OP', 'C', '138', 'KV', 'CB', 'MARQUETT', 'L', 'KUC-MAR', '138', 'KV', 'L', 'MARQUETT', 'LD', 'T1', '138', 'KV', 'LD', 'MARQUETT', 'OP', 'U', '138', 'KV', 'DIS', 'MARQUETT', 'OP', 'XT1', '138', 'KV', 'CB', 'SILVERLK', 'OP', 'C', '138', 'KV', 'CB', 'SILVERLK', 'OP', 'V', '138', 'KV', 'DIS', 'SILVERLK', 'OP', 'XT2', '138', 'KV', 'CB', '98011', 'L138.SilverLake-Olive',

['L230.OtterCrk-SafeHarbor-Manor.2302+Relay.U7.U6.U5', 'SAFEHARB', 'OP', 'TR1-NW', '13', 'KV', 'CB', 'SAFEHARB', 'OP', 'TR1-SE', '13', 'KV', 'CB', 'SAFEHARB', 'OP', 'TR1-SW', '13', 'KV', 'DIS', 'SAFEHARB', 'OP', 'TR2-NE', '13', 'KV', 'CB', 'SAFEHARB', 'OP', 'TR2-NW', '13', 'KV', 'CB', 'SAFEHARB', 'OP', 'TR2-SE', '13', 'KV', 'CB', 'SAFEHARB', 'OP', 'TR2-SW', '13', 'KV', 'DIS', 'SAFEHARB', 'OP', 'TRAN2301', '230', 'KV', 'CB', 'SAFEHARB', 'OP', 'TRAN2302', '230', 'KV', 'CB', 'SAFEHARB', 'U', 'UNIT5', '13', 'KV', 'U', 'SAFEHARB', 'U', 'UNIT6', '13', 'KV', 'U', 'SAFEHARB', 'U', 'UNIT7', '13', 'KV', 'U', 'MANOR', 'L', 'MAN-SAF1', '230', 'KV', 'L', 'MANOR', 'OP', 'MLWD_S', '230', 'KV', 'CB', 'MANOR', 'OP', 'WHEM_2-S', '230', 'KV', 'CB', 'OTTCRKPL', 'OP', 'CONA2302', '230', 'KV', 'CB', 'OTTCRKPL', 'L', 'OTT-SAF', '230', 'KV', 'L', 'OTTCRKPL', 'OP', 'YORK', '230', 'KV', 'CB', 'SAFEHARB', 'X', '2301', '230', 'KV', 'X', 'SAFEHARB', 'X', '2302', '230', 'KV', 'X', 'SAFEHARB', 'OP', 'GEN5-N', '13', 

In [525]:
len(node_list.substation.unique())

5427

In [534]:
len(node_list[node_list.voltage > 34].substation.unique())

4979

In [None]:
# construct 765 kV layer

In [529]:
node_list = pd.read_excel("data/lmp-bus-model.xlsx", skiprows=2)
node_list.columns = ["pnode_id", "transmission_zone", "substation", "voltage", "equipment", "type"]
node_list.voltage = node_list.voltage.apply(lambda x: float(x.split()[0]))

In [40]:
np.sort(node_list[node_list.voltage == '765 KV'].substation.unique())

array(['112 WILTON', '167 PLANO', '23 COLLINS', 'AMOS', 'AXTON', 'BAKER',
       'BELMONT', 'BROADFORD', 'CLOVERDALE', 'COOK', 'CULLODEN', 'DUMONT',
       'FLATLICK', 'GAVIN', 'HANGING ROCK', 'JACKSONS', 'JACKSONS FERRY',
       'JEFFERSON', 'JOSHUA FALLS', 'KAMMER', 'MALISZEWSKI', 'MARQUIS',
       'MARYSVILLE', 'MOUNTAINEER', 'NORTH PROCTORVILLE', 'REYNOLD2',
       'ROCKPORT', 'SORENSON', 'SOUTH CANTON', 'SULLIVAN-AEP', 'WYOMING'],
      dtype=object)

In [54]:
for x in sorted(list(lines_765_nodes)):
    if x not in np.sort(node_list[node_list.voltage == '765 KV'].substation.unique()):
        print(x)

Amos
Axton
Baker
Broadford
Chateauguay
Cloverdale
Collins
Cook
Cornu
Culloden
DonMarquis
Dumont
Flatlick
Gavin
Greentown
HangingRock
JacksonsFerry
Jefferson
JoshuaFalls
Kammer
Maliszewski
Marcy
Marysville
Massena
Mountaineer
NorthProctorville
RckptWks3
Rockport
RockportWorks
Rockpwks
SCanton
Sorenson
SouthCanton
Sullivan
Vassell
Wilton
WiltonCenter
WiltonCenter+L138
Wyoming
