In [1]:
import pandas as pd
import pyodbc
import numpy as np
import time, os
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option("display.max_columns", 999)
pd.set_option('display.max_rows', 700)
pd.set_option("display.expand_frame_repr",True)
pd.set_option("display.max_colwidth", 100)

plt.rcParams['figure.figsize'] = (10,10)

In [2]:
def sqlite_sql(q, path=r"C:\Users\armando_borjas\Documents\IPL_full.db"):

    # Read sqlite query results into a pandas DataFrame
    conn = sqlite3.connect(path)
    df = pd.read_sql_query(q, conn)

    conn.close()
    return df

In [3]:
def sqlserver_sql(q, server = 'SQL2017', db = 'IPL_IRASV6_STAGE'):
    driver = '{SQL Server Native Client 11.0}'
    conn = pyodbc.connect("Driver="+driver+";Server="+server+";Database="+db+";Trusted_Connection=yes;")

    # performing query to database
    df = pd.read_sql_query(q,conn)

    conn.close()
    return df

In [4]:
os.chdir(r"C:\Users\armando_borjas\Documents")

In [5]:
%ls

 Volume in drive C has no label.
 Volume Serial Number is FE48-FC0C

 Directory of C:\Users\armando_borjas\Documents

2020-07-07  08:34 AM    <DIR>          .
2020-07-07  08:34 AM    <DIR>          ..
2020-05-21  12:09 PM           182,133 Corrosion Monte Carlo Analysis.docx
2020-06-23  05:12 PM    <DIR>          Custom Office Templates
2020-04-01  12:59 PM         1,085,868 Demo.pbix
2019-12-11  09:13 AM    <DIR>          Downloads
2019-10-29  08:19 AM    <DIR>          Dynamic Risk
2020-02-06  05:18 PM    <DIR>          GIS DataBase
2020-06-23  01:55 PM            18,576 ILI Processing Algorithm.algproj
2020-07-08  02:37 PM    <DIR>          IPL 20200626 Export
2020-06-11  08:26 AM             2,263 IPL_20200611-template_to_run.raprj
2020-06-23  03:05 PM        43,390,234 IPL_20200623_QAQCILI_check.raprj
2020-06-24  03:02 PM       531,933,846 IPL_20200623T1550-system.raprj
2020-06-24  09:24 AM     1,126,303,445 IPL_detailed_all.csv
2020-06-24  10:52 AM     1,371,111,424 IPL_full.db
2

In [6]:
q = """SELECT name FROM sqlite_master
    WHERE type='table'
    ORDER BY name;"""

sqlite_sql(q)

Unnamed: 0,name
0,Pipeline_all
1,Summary_IPL_20200623T1550-system_Default_Summary
2,Summary_full_summary


In [7]:
q1 = """select * from 'Summary_full_summary'"""

temp = sqlite_sql(q1)

In [8]:
temp.count()[0]

830087

In [9]:
temp.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 830087 entries, 0 to 830086
Data columns (total 38 columns):
 #   Column                                                           Non-Null Count   Dtype  
---  ------                                                           --------------   -----  
 0   Corporation                                                      830087 non-null  object 
 1   Operating Unit                                                   830087 non-null  object 
 2   District Division                                                830087 non-null  object 
 3   Pipeline System                                                  830087 non-null  object 
 4   Line Name                                                        830087 non-null  object 
 5   LineID                                                           830087 non-null  int64  
 6   GridStartMeasure                                                 830087 non-null  float64
 7   GridEndMeasure               

In [11]:
nomfl_df = temp.loc[temp['Date of last ILI MFL'].isnull(),['Line Name','Begin Measure (m)','End Measure (m)','Length (m)']]\
.groupby('Line Name').sum()['Length (m)'].sort_values().to_frame()

iliq = """select ll.linename, r.effectiveenddate, ld2.code [status], ld.code [tool], r.ilirstartdate from inlineinspectionrange r
            join stationseries ss on r.beginstationseriesid = ss.id
            join lineloop ll on ss.lineloopid = ll.id
            join listdomain ld on r.ilirtooldomainid = ld.id
            join listdomain ld2 on r.ilirstatusdomainid = ld2.id
            where ld.code in ('AFD-EMAT','Combo','Generic MFL','MFL','MFL - geometry','MFL-A','Spiral MFL')"""

ili_df = sqlserver_sql(iliq).groupby('linename').agg({'ilirstartdate':['max','count']})
ili_df.index = ili_df.index.rename('Line Name')

df_analysis = nomfl_df.join(ili_df)

df_analysis.rename(columns={('ilirstartdate', 'max'):'Latest ILI Date',
                                     ('ilirstartdate', 'count'):'Count of ILI Range Records'}, inplace=True)

df_analysis\
.query('~`Latest ILI Date`.isnull()')\
#.reset_index()['Line Name'].to_clipboard()

In [8]:
def threat_cat(df, column):
    return pd.cut(df.loc[:,column],
                  [0.00, 1.00e-5, 1.00e-4, 1.00e-3, 1.00e-2, np.inf],
                   labels=['L1','L2','L3','L4','L5'],
                  right=False)

def consq_cat(df, column):
    return pd.cut(df.loc[:,column],
                 [0.00, 2.00, 3.00, 4.00, 5.00, np.inf],
                 labels=['C1','C2','C3','C4','C5'],
                 right=False)

def risk_rank(df, threat, consequence):
    risk_ranks = {'Low':[('L1',1.),
                        ('L1',2.),
                        ('L1',3.),
                        ('L1',4.),
                        ('L1',5.),
                        ('L2',1.),
                        ('L2',2.),
                        ('L2',3.),
                        ('L2',4.),
                        ('L3',1.),
                        ('L3',2.),
                        ('L3',3.),
                        ('L4',1.),
                        ('L4',2.)],
                 'Medium':[('L2',5.),
                           ('L3',4.),
                           ('L4',3.),
                           ('L5',2.),
                           ('L5',1.)],
                 'High':[('L3',5.),
                         ('L4',4.),
                         ('L5',3.)],
                 'Extreme':[('L4',5.),
                            ('L5',5.),
                            ('L5',4.)]}
    
    conditions = [pd.Series([tuple(x) for x in df.loc[:,[threat,consequence]].to_numpy()]).isin(risk_ranks['Low']),
                 pd.Series([tuple(x) for x in df.loc[:,[threat,consequence]].to_numpy()]).isin(risk_ranks['Medium']),
                 pd.Series([tuple(x) for x in df.loc[:,[threat,consequence]].to_numpy()]).isin(risk_ranks['High']),
                 pd.Series([tuple(x) for x in df.loc[:,[threat,consequence]].to_numpy()]).isin(risk_ranks['Extreme'])]
    
    choices = ['Low',
              'Medium',
              'High',
              'Extreme']
    
    return pd.Series(np.select(conditions, choices, default='Low'))
    
#     return df.loc[:,[threat,consequence]].apply(lambda x: np.where((x.loc[threat], x.loc[consequence]) in risk_ranks['ORM']['Low'], 'Low', 
#                                                                           np.where((x.loc[threat], x.loc[consequence]) in risk_ranks['ORM']['Medium'], 'Medium',
#                                                                                   np.where((x.loc[threat], x.loc[consequence]) in risk_ranks['ORM']['High'], 'High',
#                                                                                           'Extreme'))), axis=1)

In [81]:
#QC of likelihood, consequence,and risk category determination.

pd.Series([tuple(x) for x in temp.loc[:,['_Matrix_Row_Label','Consequence']].to_numpy()]).isin([('L5',1.0),
                                                                                                ('L4',1.0),
                                                                                               ('L1',5.0),
                                                                                               ('L1',4.0)])

# pd.concat([threat_cat(temp,'Probability'),temp['_Matrix_Row_Label']], axis=1).Probability.eq('L5')#.assign(check = lambda x: x['Probability']==x['_Matrix_Row_Label']).query('check!=True')

# temp.loc[:,'_Matrix_Row_Label'].eq('L1') & temp.Consequence.eq(5)

pd.concat([risk_rank(temp, '_Matrix_Row_Label', 'Consequence').rename('QC'),temp['_Matrix_Legend_Label']], axis=1).assign(check = lambda x: x['QC']==x['_Matrix_Legend_Label']).query('check!=True')

0         True
1         True
2         True
3         True
4         True
          ... 
830082    True
830083    True
830084    True
830085    True
830086    True
Length: 830087, dtype: bool

In [9]:
temp.loc[:,'EC_LOF'] = threat_cat(temp, 'External Corrosion Likelihood of Failure (/km.yr)')
temp.loc[:,'IC_LOF'] = threat_cat(temp, 'Internal Corrosion Likelihood of Failure (/km.yr)')
temp.loc[:,'SCC_LOF'] = threat_cat(temp, 'Stress Corrosion Cracking Likelihood of Failure (/km.yr)')
temp.loc[:,'LFERW_LOF'] = threat_cat(temp, 'Weld Imperfections Likelihood of Failure (/km.yr)')
temp.loc[:,'MD_LOF'] = threat_cat(temp, 'Mechanical Damage Likelihood of Failure (/km.yr)')
temp.loc[:,'GEO_LOF'] = threat_cat(temp, 'Material Yielding Geotechnical Likelihood of Failure (/km.yr)')
temp.loc[:,'HYD_LOF'] = threat_cat(temp, 'Material Yielding Hydrotechnical Likelihood of Failure (/km.yr)')
temp.loc[:,'TOTAL_LOF'] = threat_cat(temp, 'Probability')

print('Done!')

Done!


In [10]:
temp.loc[:,'SAF_COF'] = consq_cat(temp, 'Safety Impact')
temp.loc[:,'ENV_COF'] = consq_cat(temp, 'Environment Impact')
temp.loc[:,'FIN_COF'] = consq_cat(temp, 'Financial Impact')
temp.loc[:,'OUT_COF'] = consq_cat(temp, 'Outage Impact')
temp.loc[:,'TOTAL_COF'] = consq_cat(temp, 'Consequence')

print('Done!')

Done!


In [11]:
threat_lofs = ['EC_LOF', 'IC_LOF', 'SCC_LOF', 'LFERW_LOF', 'MD_LOF', 'GEO_LOF', 'HYD_LOF']
calcs = ['EC_RISK','IC_RISK','SCC_RISK','LFERW_RISK','MD_RISK','GEO_RISK','HYD_RISK']
for x, y in zip(threat_lofs,calcs):
    temp.loc[:,y] = risk_rank(temp, x,'Consequence')
    
print('Done!')

Done!


In [12]:
cons_cofs = ['Safety Impact', 'Environment Impact', 'Financial Impact', 'Outage Impact','Consequence']
calcs = ['SAF_RISK','ENV_RISK','FIN_RISK','OUT_RISK','TOTAL_RISK']
for x, y in zip(cons_cofs,calcs):
    temp.loc[:,y] = risk_rank(temp, '_Matrix_Row_Label',x)
    
print('Done!')

Done!


In [13]:
temp.iloc[:,-50:]

Unnamed: 0,Likelihood Driver,Likelihood Driver Method,External Corrosion Likelihood of Failure (/km.yr),EC Method Used,Internal Corrosion Likelihood of Failure (/km.yr),IC Method Used,Stress Corrosion Cracking Likelihood of Failure (/km.yr),SCC Method Used,Weld Imperfections Likelihood of Failure (/km.yr),LF ERW Method Used,Mechanical Damage Likelihood of Failure (/km.yr),Material Yielding Geotechnical Likelihood of Failure (/km.yr),Material Yielding Hydrotechnical Likelihood of Failure (/km.yr),Consequence,Safety Impact,PIR (m),Environment Impact,Financial Impact,Outage Impact,Matrix_Total_Probability,Matrix_Total_Consequence,_Matrix_Legend_Label,_Matrix_Cell_Label,_Matrix_Column_Label,_Matrix_Row_Label,EC_LOF,IC_LOF,SCC_LOF,LFERW_LOF,MD_LOF,GEO_LOF,HYD_LOF,TOTAL_LOF,SAF_COF,ENV_COF,FIN_COF,OUT_COF,TOTAL_COF,EC_RISK,IC_RISK,SCC_RISK,LFERW_RISK,MD_RISK,GEO_RISK,HYD_RISK,SAF_RISK,ENV_RISK,FIN_RISK,OUT_RISK,TOTAL_RISK
0,EC,EC Method 1,0.1,EC Method 1,0.1,IC Method 1,0.0,SCC Method 1,0.0,LF ERW Method 1,0.030013,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.2,1.0,Medium,(11),C1,L5,L5,L5,L1,L1,L5,L1,L1,L5,C1,C1,C1,C1,C1,Medium,Medium,Low,Low,Medium,Low,Low,Medium,Medium,Medium,Medium,Medium
1,EC,EC Method 1,0.1,EC Method 1,0.1,IC Method 1,0.0,SCC Method 1,0.0,LF ERW Method 1,0.030013,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.2,1.0,Medium,(11),C1,L5,L5,L5,L1,L1,L5,L1,L1,L5,C1,C1,C1,C1,C1,Medium,Medium,Low,Low,Medium,Low,Low,Medium,Medium,Medium,Medium,Medium
2,EC,EC Method 1,0.1,EC Method 1,0.1,IC Method 1,0.0,SCC Method 1,0.0,LF ERW Method 1,0.030013,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.2,1.0,Medium,(11),C1,L5,L5,L5,L1,L1,L5,L1,L1,L5,C1,C1,C1,C1,C1,Medium,Medium,Low,Low,Medium,Low,Low,Medium,Medium,Medium,Medium,Medium
3,EC,EC Method 1,0.1,EC Method 1,0.1,IC Method 1,0.0,SCC Method 1,0.0,LF ERW Method 1,0.030013,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.2,1.0,Medium,(11),C1,L5,L5,L5,L1,L1,L5,L1,L1,L5,C1,C1,C1,C1,C1,Medium,Medium,Low,Low,Medium,Low,Low,Medium,Medium,Medium,Medium,Medium
4,EC,EC Method 1,0.1,EC Method 1,0.1,IC Method 1,0.0,SCC Method 1,0.0,LF ERW Method 1,0.030013,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.2,1.0,Medium,(11),C1,L5,L5,L5,L1,L1,L5,L1,L1,L5,C1,C1,C1,C1,C1,Medium,Medium,Low,Low,Medium,Low,Low,Medium,Medium,Medium,Medium,Medium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
830082,EC,EC Method 2,0.0,EC Method 2,0.0,IC Method 2,0.0,SCC Method 1,0.0,LF ERW Method 1,0.000383,0.0,0.0,5.0,2.0,0.0,5.0,3.0,4.0,0.0,5.0,Low,(12),C5,L1,L1,L1,L1,L1,L3,L1,L1,L1,C2,C5,C3,C4,C5,Low,Low,Low,Low,High,Low,Low,Low,Low,Low,Low,Low
830083,EC,EC Method 2,0.0,EC Method 2,0.0,IC Method 2,0.0,SCC Method 1,0.0,LF ERW Method 1,0.000383,0.0,0.0,4.0,2.0,0.0,1.0,3.0,4.0,0.0,4.0,Low,(16),C4,L1,L1,L1,L1,L1,L3,L1,L1,L1,C2,C1,C3,C4,C4,Low,Low,Low,Low,Medium,Low,Low,Low,Low,Low,Low,Low
830084,EC,EC Method 2,0.0,EC Method 2,0.0,IC Method 2,0.0,SCC Method 1,0.0,LF ERW Method 1,0.000383,0.0,0.0,4.0,2.0,0.0,1.0,4.0,4.0,0.0,4.0,Low,(16),C4,L1,L1,L1,L1,L1,L3,L1,L1,L1,C2,C1,C4,C4,C4,Low,Low,Low,Low,Medium,Low,Low,Low,Low,Low,Low,Low
830085,EC,EC Method 2,0.0,EC Method 2,0.0,IC Method 2,0.0,SCC Method 1,0.0,LF ERW Method 1,0.000310,0.0,0.0,4.0,2.0,0.0,1.0,4.0,4.0,0.0,4.0,Low,(16),C4,L1,L1,L1,L1,L1,L3,L1,L1,L1,C2,C1,C4,C4,C4,Low,Low,Low,Low,Medium,Low,Low,Low,Low,Low,Low,Low


In [20]:
def build_matrix(df, threat, consequence, agg='sum'):
    
    temp_pt = df.pivot_table(index=threat, 
                                  columns=consequence, 
                                  values='Length (m)',
                                  aggfunc=agg,
                                 dropna=False, margins=True, margins_name='Total',
                                 fill_value=0.00)
    
#     temp_pt.index = pd.CategoricalIndex(temp_pt.index, categories=['L1',
#                                                              'L2',
#                                                              'L3',
#                                                              'L4',
#                                                             'L5'], ordered=True)
    return temp_pt


def risk_matrix(x):

    green = 'background-color: green'
    blue = 'background-color: #0000ff'
    yellow = 'background-color: #ffff00'
    red = 'background-color: #ff0000'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    df1.iloc[1, 0] = green
    df1.iloc[1, 1] = green
    df1.iloc[2, 0] = green
    df1.iloc[2, 1] = green
    df1.iloc[2, 2] = green
    df1.iloc[3, 0] = green
    df1.iloc[3, 1] = green
    df1.iloc[3, 2] = green
    df1.iloc[3, 3] = green
    df1.iloc[4, 0] = green
    df1.iloc[4, 1] = green
    df1.iloc[4, 2] = green
    df1.iloc[4, 3] = green
    df1.iloc[4, 4] = green
    df1.iloc[0, 0] = blue
    df1.iloc[0, 1] = blue
    df1.iloc[1, 2] = blue
    df1.iloc[2, 3] = blue
    df1.iloc[3, 4] = blue
    df1.iloc[0, 2] = yellow
    df1.iloc[1, 3] = yellow
    df1.iloc[2, 4] = yellow
    df1.iloc[0, 3] = red
    df1.iloc[0, 4] = red
    df1.iloc[1, 4] = red
    return df1

def risk_cell_text(x):
    green = 'color: white'
    blue = 'color: white'
    yellow = 'color: black'
    red = 'color: white'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    df1.iloc[1, 0] = green
    df1.iloc[1, 1] = green
    df1.iloc[2, 0] = green
    df1.iloc[2, 1] = green
    df1.iloc[2, 2] = green
    df1.iloc[3, 0] = green
    df1.iloc[3, 1] = green
    df1.iloc[3, 2] = green
    df1.iloc[3, 3] = green
    df1.iloc[4, 0] = green
    df1.iloc[4, 1] = green
    df1.iloc[4, 2] = green
    df1.iloc[4, 3] = green
    df1.iloc[4, 4] = green
    df1.iloc[0, 0] = blue
    df1.iloc[0, 1] = blue
    df1.iloc[1, 2] = blue
    df1.iloc[2, 3] = blue
    df1.iloc[3, 4] = blue
    df1.iloc[0, 2] = yellow
    df1.iloc[1, 3] = yellow
    df1.iloc[2, 4] = yellow
    df1.iloc[0, 3] = red
    df1.iloc[0, 4] = red
    df1.iloc[1, 4] = red
    return df1

def styled_matrix(df, threat, consequence, agg='sum'):
    matrix = build_matrix(df, threat,consequence, agg=agg).rename(index={np.nan:'Total'})
    
    matrix.index = pd.Categorical(matrix.index, ["Total","L1", "L2", "L3", "L4","L5"], ordered=True)
    
    matrix = matrix.sort_index(ascending=False).style.apply(risk_matrix, axis=None).apply(risk_cell_text, axis=None).set_properties(**{'width': '50px',
                                                                                        'height':'60px',
                                                                                        'border':'1px solid #aaaaaa',
                                                                                        'text-align':'center'}).set_table_styles([{'selector':'th','props':[('border','1px solid #aaaaaa')]}])
    return matrix

In [21]:
styled_matrix(temp,'_Matrix_Row_Label','SAF_COF')

# temp.query("`Pipeline System` == 'Cochrane Extraction Pipelines'").pivot_table(index='EC_LOF', 
#                                   columns='TOTAL_COF', 
#                                   values='Length (m)',
#                                   aggfunc='sum',
#                                  dropna=False, margins=True, margins_name='Total',
#                                  fill_value=0.00)

SAF_COF,C1,C2,C3,C4,C5,Total
L5,15662.55,15068.339,617.442,0.0,0.0,31348.331
L4,480251.126,567711.778,51690.811,5561.106,64.208,1105279.029
L3,444451.535,448762.383,33379.297,790.788,121.941,927505.944
L2,531060.579,986702.683,71532.754,5858.28,5161.252,1600315.548
L1,1125037.452,1264288.358,303253.786,275000.186,42190.005,3009769.787
Total,2596463.242,3282533.541,460474.09,287210.36,47537.406,6674218.639


In [22]:
def export_results(df, query_str=None, name='output'):
    
    if query_str == None:
        df1 = df.copy()
    else:
        df1 = df.query(query_str).copy()
    with pd.ExcelWriter(name+'.xlsx') as writer:  
        df1.to_excel(writer, sheet_name='results')
        styled_matrix(df1,'EC_LOF','TOTAL_COF').to_excel(writer, sheet_name='EC Matrix')
        styled_matrix(df1,'IC_LOF','TOTAL_COF').to_excel(writer, sheet_name='IC Matrix')
        styled_matrix(df1,'SCC_LOF','TOTAL_COF').to_excel(writer, sheet_name='SCC Matrix')
        styled_matrix(df1,'LFERW_LOF','TOTAL_COF').to_excel(writer, sheet_name='LFERW Matrix')
        styled_matrix(df1,'HYD_LOF','TOTAL_COF').to_excel(writer, sheet_name='HYD Matrix')
        styled_matrix(df1,'GEO_LOF','TOTAL_COF').to_excel(writer, sheet_name='GEO Matrix')
        styled_matrix(df1,'MD_LOF','TOTAL_COF').to_excel(writer, sheet_name='MD Matrix')
        styled_matrix(df1,'TOTAL_LOF','SAF_COF').to_excel(writer, sheet_name='SAF Matrix')
        styled_matrix(df1,'TOTAL_LOF','ENV_COF').to_excel(writer, sheet_name='ENV Matrix')
        styled_matrix(df1,'TOTAL_LOF','FIN_COF').to_excel(writer, sheet_name='FIN Matrix')
        styled_matrix(df1,'TOTAL_LOF','OUT_COF').to_excel(writer, sheet_name='OUT Matrix')
        styled_matrix(df1,'TOTAL_LOF','TOTAL_COF').to_excel(writer, sheet_name='Total Matrix')
    print('Done!')

In [25]:
def export_all_matrices(df, query_str=None, name='output'):
    import itertools

    threats = ['EC_LOF','IC_LOF','SCC_LOF','LFERW_LOF','MD_LOF','GEO_LOF','HYD_LOF','TOTAL_LOF']
    consequences = ['SAF_COF','ENV_COF','FIN_COF','OUT_COF','TOTAL_COF']
    
    if query_str == None:
        df1 = df.copy()
    else:
        df1 = df.query(query_str).copy()
    with pd.ExcelWriter(name+'.xlsx') as writer:
        for i, (x,y) in enumerate(itertools.product(threats, consequences)):
            styled_matrix(df1,x,y).to_excel(writer, sheet_name=f'{x}-{y}')

    print('Done!')

In [192]:
temp.groupby('Pipeline System').count().Corporation

Pipeline System
Bow River Pipelines              228309
Central Alberta Pipelines         90880
Cochrane Extraction Pipelines       198
Cold Lake Pipelines              161729
Corridor Pipelines                92721
Mid-Saskatchewan Pipelines        44379
Off Gas Pipelines                 81355
Polaris Pipelines                130516
Name: Corporation, dtype: int64

In [28]:
names = ['cochrane_processed','bowriver_processed','central_alta_processed','midsask_processed','offgas_processed','coldlake_processed','corridor_processed','polaris_processed']
for x, y in zip(temp['Pipeline System'].unique(),names):
    print(f"`Pipeline System` == '{x}'", '---',y)
    export_results(temp,f"`Pipeline System` == '{x}'",y)

`Pipeline System` == 'Cochrane Extraction Pipelines' --- cochrane_processed
Done!
`Pipeline System` == 'Bow River Pipelines' --- bowriver_processed
Done!
`Pipeline System` == 'Central Alberta Pipelines' --- central_alta_processed
Done!
`Pipeline System` == 'Mid-Saskatchewan Pipelines' --- midsask_processed
Done!
`Pipeline System` == 'Off Gas Pipelines' --- offgas_processed
Done!
`Pipeline System` == 'Cold Lake Pipelines' --- coldlake_processed
Done!
`Pipeline System` == 'Corridor Pipelines' --- corridor_processed
Done!
`Pipeline System` == 'Polaris Pipelines' --- polaris_processed
Done!


In [29]:
for x in names:
    print(pd.read_excel(x+'.xlsx').shape)

(198, 64)
(228309, 64)
(90880, 64)
(44379, 64)
(81355, 64)
(161729, 64)
(92721, 64)
(130516, 64)


In [30]:
np.array([198,
        228309,
        90880,
        44379,
        81355,
        161729,
        92721,
        130516]).sum()

830087

In [26]:
export_results(temp,name='combined_processed')

Done!


In [27]:
names2 = ['cochrane_matrices','bowriver_matrices','central_alta_matrices','midsask_matrices','offgas_matrices','coldlake_matrices','corridor_matrices','polaris_matrices']
for x, y in zip(temp['Pipeline System'].unique(),names2):
    print(f"`Pipeline System` == '{x}'", '---',y)
    export_all_matrices(temp,f"`Pipeline System` == '{x}'",y)

`Pipeline System` == 'Cochrane Extraction Pipelines' --- cochrane_matrices
Done!
`Pipeline System` == 'Bow River Pipelines' --- bowriver_matrices
Done!
`Pipeline System` == 'Central Alberta Pipelines' --- central_alta_matrices
Done!
`Pipeline System` == 'Mid-Saskatchewan Pipelines' --- midsask_matrices
Done!
`Pipeline System` == 'Off Gas Pipelines' --- offgas_matrices
Done!
`Pipeline System` == 'Cold Lake Pipelines' --- coldlake_matrices
Done!
`Pipeline System` == 'Corridor Pipelines' --- corridor_matrices
Done!
`Pipeline System` == 'Polaris Pipelines' --- polaris_matrices
Done!


In [34]:
export_all_matrices(temp,name='matrix_permutations')

Done!


In [14]:
cat_type = pd.CategoricalDtype(categories=['Low','Medium','High','Extreme'],ordered=True)
cat_type

CategoricalDtype(categories=['Low', 'Medium', 'High', 'Extreme'], ordered=True)

In [15]:
str_concat = lambda x: pd.Series(pd.Series(x).unique()).sort_values().str.cat(sep='\n')
str_concat2 = lambda x: pd.Series(pd.Series(x.astype(str)).ffill().unique()).sort_values().str.cat(sep=', ')
str_concat3 = lambda x: pd.Series(pd.Series(x.round(2).astype(str)).ffill().unique()).sort_values().str.cat(sep=', ')
num_analytics = lambda x: f"Min: {np.nanmin(x).round(2)}\nMax: {np.nanmax(x).round(2)}"
num_analytics2 = lambda x: f"Min: {np.nanmin(x):.2e}\nMax: {np.nanmax(x):.2e}"
num_analytics3 = lambda x: f"minimum of {np.nanmin(x).round(2)} and maximum of {np.nanmax(x).round(2)}"
num_analytics4 = lambda x: f"minimum of {np.nanmin(x):.3e} and maximum of {np.nanmax(x):.3e}"
num_analytics5 = lambda x: f"minimum of ${np.nanmin(x):,.2f} and maximum of ${np.nanmax(x):,.2f}"
date_analytics = lambda x: f"minimum of {pd.to_datetime(x).dt.date.min()} and maximum of {pd.to_datetime(x).dt.date.max()}"

In [107]:
# temp.SAF_RISK.astype(cat_type).max()

temp.groupby(['Line Name']).agg({'Pipeline System':lambda x: str_concat(x),
#                                'Likelihood Driver Method':lambda x: str_concat(x),
                              'SAF_RISK':lambda x: x.astype(cat_type).max(),
                              'ENV_RISK':lambda x: x.astype(cat_type).max(),
                              'FIN_RISK':lambda x: x.astype(cat_type).max(),
                              'OUT_RISK':lambda x: x.astype(cat_type).max(),
                              'TOTAL_RISK':lambda x: x.astype(cat_type).max()})#.to_clipboard()

Unnamed: 0_level_0,Pipeline System,SAF_RISK,ENV_RISK,FIN_RISK,OUT_RISK,TOTAL_RISK
Line Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LS2000,Bow River Pipelines,Low,Extreme,High,Extreme,Extreme
LS2001,Bow River Pipelines,Low,High,High,High,High
LS2002,Bow River Pipelines,Low,High,High,High,High
LS2003,Bow River Pipelines,Low,High,High,High,High
LS2004,Bow River Pipelines,Medium,Extreme,Extreme,Extreme,Extreme
LS2005,Bow River Pipelines,Low,High,High,High,High
LS2006,Bow River Pipelines,Medium,Extreme,Extreme,Extreme,Extreme
LS2008,Bow River Pipelines,Low,Low,Medium,Low,Medium
LS2012,Bow River Pipelines,Low,Extreme,High,Low,Extreme
LS2013,Bow River Pipelines,Low,High,High,Low,High


In [112]:
saf = temp.groupby(['Line Name']).agg({'Pipeline System':lambda x: str_concat(x),
                              'SAF_RISK':lambda x: x.astype(cat_type).max()
                                      }).set_index(['SAF_RISK'],append=True).index

temp.groupby(['Line Name','SAF_RISK']).agg({
                               'Likelihood Driver Method':lambda x: str_concat(x),
                                }
                               ).loc[saf,:]#.to_clipboard()


Unnamed: 0_level_0,Unnamed: 1_level_0,Likelihood Driver Method
Line Name,SAF_RISK,Unnamed: 2_level_1
LS2000,Low,EC Method 2\nIC Method 2\nSCC Method 1
LS2001,Low,SCC Method 1
LS2002,Low,EC Method 2\nSCC Method 1
LS2003,Low,GEO\nHYD\nSCC Method 1
LS2004,Medium,EC Method 2\nSCC Method 1
LS2005,Low,HYD\nSCC Method 1
LS2006,Medium,LF ERW Method 1
LS2008,Low,LF ERW Method 1
LS2012,Low,LF ERW Method 1
LS2013,Low,LF ERW Method 1


In [106]:
env = temp.groupby(['Line Name']).agg({'Pipeline System':lambda x: str_concat(x),
                              'ENV_RISK':lambda x: x.astype(cat_type).max()
                                      }).set_index(['ENV_RISK'],append=True).index

temp.groupby(['Line Name','ENV_RISK']).agg({
                               'Likelihood Driver Method':lambda x: str_concat(x),
                                }
                               )#.loc[env,:].to_clipboard()

Unnamed: 0_level_0,Unnamed: 1_level_0,Likelihood Driver Method
Line Name,ENV_RISK,Unnamed: 2_level_1
LS2000,Extreme,EC Method 2\nIC Method 2
LS2000,High,EC Method 2\nSCC Method 1
LS2000,Low,EC Method 2\nIC Method 2\nSCC Method 1
LS2000,Medium,EC Method 2\nSCC Method 1
LS2001,High,SCC Method 1
LS2001,Low,SCC Method 1
LS2001,Medium,SCC Method 1
LS2002,High,SCC Method 1
LS2002,Low,EC Method 2\nSCC Method 1
LS2002,Medium,SCC Method 1


In [95]:
fin = temp.groupby(['Line Name']).agg({'Pipeline System':lambda x: str_concat(x),
                              'FIN_RISK':lambda x: x.astype(cat_type).max()
                                      }).set_index(['FIN_RISK'],append=True).index

temp.groupby(['Line Name','FIN_RISK']).agg({
                               'Likelihood Driver Method':lambda x: str_concat(x),
                                }
                               ).loc[fin,:].to_clipboard()

In [105]:
out = temp.groupby(['Line Name']).agg({'Pipeline System':lambda x: str_concat(x),
                              'OUT_RISK':lambda x: x.astype(cat_type).max()
                                      }).set_index(['OUT_RISK'],append=True).index

temp.groupby(['Line Name','OUT_RISK']).agg({
                               'Likelihood Driver Method':lambda x: str_concat(x),
                                }
                               )#.loc[out,:].to_clipboard()

Unnamed: 0_level_0,Unnamed: 1_level_0,Likelihood Driver Method
Line Name,OUT_RISK,Unnamed: 2_level_1
LS2000,Extreme,EC Method 2\nIC Method 2
LS2000,High,EC Method 2\nIC Method 2\nSCC Method 1
LS2001,High,SCC Method 1
LS2002,High,EC Method 2\nSCC Method 1
LS2003,High,GEO\nHYD\nSCC Method 1
LS2004,Extreme,EC Method 2\nSCC Method 1
LS2005,High,HYD\nSCC Method 1
LS2006,Extreme,EC Method 2\nLF ERW Method 1
LS2008,Low,LF ERW Method 1
LS2012,Low,LF ERW Method 1


In [103]:
tot = temp.groupby(['Line Name']).agg({'Pipeline System':lambda x: str_concat(x),
                              'TOTAL_RISK':lambda x: x.astype(cat_type).max()
                                      }).set_index(['TOTAL_RISK'],append=True).index

temp.groupby(['Line Name','TOTAL_RISK']).agg({
                               'Likelihood Driver Method':lambda x: str_concat(x),
                                }
                               ).loc[tot,:].to_clipboard()