# Import Statements

In [1]:
import pandas as pd
import pyodbc
import numpy as np
from scipy.stats import norm
import datetime
import matplotlib.pyplot as plt
from matplotlib import colors 
from sympy.physics import units

pd.set_option("display.max_columns", 999)
pd.set_option('display.max_rows', 500)
pd.set_option("display.expand_frame_repr",True)
pd.set_option("display.max_colwidth", 100)

In [2]:
# folder = r"Z:\Interpipeline Fund\2019-01_Annual_Risk_Management_Support\3_Engineering\Results & QC\2 Risk Extracts"
folder = r"C:\Users\armando_borjas\Documents"
%cd $folder
%ls

C:\Users\armando_borjas\Documents
 Volume in drive C has no label.
 Volume Serial Number is FE48-FC0C

 Directory of C:\Users\armando_borjas\Documents

2020-09-04  02:30 PM           120,640 (Issued) PMC Sentencing Criteria.algproj
2020-09-29  08:59 AM    <DIR>          .
2020-09-29  08:59 AM    <DIR>          ..
2020-09-09  09:43 AM           131,271 API 1183 Dent Assessment.algproj
2020-08-10  02:26 PM    <DIR>          Custom Office Templates
2020-04-01  12:59 PM         1,085,868 Demo.pbix
2019-12-11  09:13 AM    <DIR>          Downloads
2019-10-29  08:19 AM    <DIR>          Dynamic Risk
2020-09-24  04:45 PM    <DIR>          Filed Work
2020-09-25  11:41 AM            57,775 FortisBC_RockTunnel_SamplePOE.xlsx
2020-02-06  05:18 PM    <DIR>          GIS DataBase
2020-09-08  09:53 AM           301,900 Inter Pipeline Semi-Quantitative Risk Algorithm.algproj
2020-09-29  08:59 AM           144,089 IPC2020-Final-Paper-List-DR.xlsx
2019-12-09  11:37 AM    <DIR>          My Received Files


# Functions

In [4]:
def sqlserver_sql(q, server = 'SQL2017', db = 'IPL_IRASV6_STAGE'):
    driver = '{SQL Server Native Client 11.0}'
    conn = pyodbc.connect("Driver="+driver+";Server="+server+";Database="+db+";Trusted_Connection=yes;")

    # performing query to database
    df = pd.read_sql_query(q,conn)

    conn.close()
    return df

In [5]:
def get_ili_ranges(line):
    q1 = f"""set nocount on;
            select ll.LineName, ld.code [status], ld2.code [tool], r.* from InlineInspectionRange r
            join StationSeries ss on r.BeginStationSeriesId = ss.Id
            join LineLoop ll on ss.LineLoopId = ll.Id
            join ListDomain ld on r.ILIRStatusDomainId = ld.Id
            join ListDomain ld2 on r.ILIRToolDomainId = ld2.Id
            where ll.LineName like '%{line}%'
            order by r.ILIRStartDate desc"""

    return sqlserver_sql(q1)

In [6]:
def build_df(od_i, wt_mm, grade_mpa, maop_kpa, installdate, ILIdate, pdf, lengthmm, create=True, **kwargs):
    if create:
        temp_dict = dict(OD_inch=[od_i],
                        WT_mm=[wt_mm],
                        grade_MPa=[grade_mpa],
                        install_date=[installdate],
                        MAOP_kPa=[maop_kpa],
                        ILIRStartDate=[ILIdate],
                        depth_fraction=[pdf],
                        length_mm=[lengthmm]
                        )
            
        return pd.DataFrame(temp_dict)
    else:
        temp_df = pd.DataFrame(dict(OD_inch=[od_i],
                        WT_mm=[wt_mm],
                        grade_MPa=[grade_mpa],
                        install_date=[installdate],
                        MAOP_kPa=[maop_kpa],
                        ILIRStartDate=[ILIdate],
                        depth_fraction=[pdf],
                        length_mm=[lengthmm]
                        ))
        return kwargs['df'].append(temp_df)

In [7]:
def get_features_for_poe(ILIRID, start=1, amt=999999999):
    query = f"""select
            c.RN,
            ll.linename [line],

            ld4.code [vendor], 

            format(r.ILIRStartDate,'yyyy-MM-dd') [ILIRStartDate],
            ld3.code [tool],

            f.ILIFFeatureNumber [FeatureID],
            ld.code [status], 
            ld2.code [type],
            (f.StationNum*mlv.MultiplierNum+mlv.FactorNum) [chainage_m], 
            f.ILIFSurfaceInd, 
            f.ILIFPeakDepthPct [depth_fraction], 
            f.ILIFLength [length_mm], 
            f.ILIFWidth [width_mm], 

            format(a.PipeInserviceDate,'yyyy-MM-dd') [install_date],
            a.PipeOutsideDiameter [OD_inch],
            a.PipeWallThickness [WT_mm],
            a.PipeGrade [grade_MPa],
            a.PipeToughness [toughness_J],
            a.[begin_ps_c],
            a.[end_ps_c],

            b.MAOP_kPa,
            b.begin_maop_c,
            b.end_maop_c  

            from InlineInspectionFeature f

            left join ListDomain ld on f.ILIFStatusDomainId = ld.Id
            left join ListDomain ld2 on f.ILIFTypeDomainId = ld2.Id
            left join StationSeries ss on f.StationSeriesId = ss.id
            left join LineLoop ll on ss.LineLoopId = ll.Id
            left join inlineinspectionrange r on f.inlineinspectionrangeid = r.id
            left join ListDomain ld3 on r.ILIRToolDomainId = ld3.Id
            left join InlineInspection i on f.InlineInspectionId = i.Id
            left join ListDomain ld4 on i.ILICompanyDomainId = ld4.Id
            left join MLVCorrection mlv on f.StationSeriesId = mlv.StationSeriesId

            left join 
                (select ll.id [LinloopId],
                ll.LineName,
                ps.EffectiveStartDate,
                ps.PipeInserviceDate,
                ps.PipeOutsideDiameter,
                ps.PipeWallThickness,
                ps.PipeGrade,
                ps.PipeToughness,
                (ps.BeginStationNum*mlv1.MultiplierNum+mlv1.FactorNum) [begin_ps_c],
                (ps.EndStationNum*mlv2.MultiplierNum+mlv2.FactorNum) [end_ps_c]
                from PipeSegment ps
                join StationSeries ss on ps.BeginStationSeriesId = ss.id
                join LineLoop ll on ss.LineLoopId = ll.Id
                join MLVCorrection mlv1 on ps.BeginStationSeriesId = mlv1.StationSeriesId
                join MLVCorrection mlv2 on ps.EndStationSeriesId = mlv2.StationSeriesId
                where ps.EffectiveEndDate is null
                ) a on ((f.StationNum*mlv.MultiplierNum+mlv.FactorNum) between a.[begin_ps_c] and a.[end_ps_c]) and a.LinloopId = ll.id

            left join 
                (select ll.id [LinloopId],
                ll.LineName,
                maop.EffectiveEndDate,
                maop.BeginStationSeriesId,
                maop.MaxAllowablePressure [MAOP_kPa],
                maop.BeginStationNum*mlv1.MultiplierNum+mlv1.FactorNum [begin_maop_c],
                maop.EndStationNum*mlv2.MultiplierNum+mlv2.FactorNum [end_maop_c]
                from maop maop
                join StationSeries ss on ss.id = maop.BeginStationSeriesId
                join LineLoop ll on ss.LineLoopId = ll.Id
                join MLVCorrection mlv1 on maop.BeginStationSeriesId = mlv1.StationSeriesId
                join MLVCorrection mlv2 on maop.EndStationSeriesId = mlv2.StationSeriesId
                where maop.EffectiveEndDate is null
                ) b on ((f.StationNum*mlv.MultiplierNum+mlv.FactorNum) between b.[begin_maop_c] and b.[end_maop_c]) and b.[LinloopId] = ll.id 

            left join
                (select ROW_NUMBER() over(partition by ff.InlineInspectionRangeId order by ff.id asc)  [RN],
                ff.id,
                ff.InlineInspectionRangeId
                from InlineInspectionFeature ff
                )  c on f.Id = c.Id and f.InlineInspectionRangeId = c.InlineInspectionRangeId

            where f.InlineInspectionRangeId = {ILIRID} and (c.RN between {start} and {start+amt})
            order by chainage_m asc"""
    
    return sqlserver_sql(query)

In [8]:
def get_features_for_poe_by_ID(seriesID):
    seriesID_string = seriesID.aggregate(lambda x:  "(" + ', '.join(x.astype(str)) + ")")
    
    query = f"""select
            c.RN,
            ll.linename [line],

            ld4.code [vendor], 

            format(r.ILIRStartDate,'yyyy-MM-dd') [ILIRStartDate],
            ld3.code [tool],

            f.ILIFFeatureNumber [FeatureID],
            ld.code [status], 
            ld2.code [type],
            (f.StationNum*mlv.MultiplierNum+mlv.FactorNum) [chainage_m], 
            f.ILIFSurfaceInd, 
            f.ILIFPeakDepthPct [depth_fraction], 
            f.ILIFLength [length_mm], 
            f.ILIFWidth [width_mm], 

            format(a.PipeInserviceDate,'yyyy-MM-dd') [install_date],
            a.PipeOutsideDiameter [OD_inch],
            a.PipeWallThickness [WT_mm],
            a.PipeGrade [grade_MPa],
            a.PipeToughness [toughness_J],
            a.[begin_ps_c],
            a.[end_ps_c],

            b.MAOP_kPa,
            b.begin_maop_c,
            b.end_maop_c  

            from InlineInspectionFeature f

            left join ListDomain ld on f.ILIFStatusDomainId = ld.Id
            left join ListDomain ld2 on f.ILIFTypeDomainId = ld2.Id
            left join StationSeries ss on f.StationSeriesId = ss.id
            left join LineLoop ll on ss.LineLoopId = ll.Id
            left join inlineinspectionrange r on f.inlineinspectionrangeid = r.id
            left join ListDomain ld3 on r.ILIRToolDomainId = ld3.Id
            left join InlineInspection i on f.InlineInspectionId = i.Id
            left join ListDomain ld4 on i.ILICompanyDomainId = ld4.Id
            left join MLVCorrection mlv on f.StationSeriesId = mlv.StationSeriesId

            left join 
                (select ll.id [LinloopId],
                ll.LineName,
                ps.EffectiveStartDate,
                ps.PipeInserviceDate,
                ps.PipeOutsideDiameter,
                ps.PipeWallThickness,
                ps.PipeGrade,
                ps.PipeToughness,
                (ps.BeginStationNum*mlv1.MultiplierNum+mlv1.FactorNum) [begin_ps_c],
                (ps.EndStationNum*mlv2.MultiplierNum+mlv2.FactorNum) [end_ps_c]
                from PipeSegment ps
                join StationSeries ss on ps.BeginStationSeriesId = ss.id
                join LineLoop ll on ss.LineLoopId = ll.Id
                join MLVCorrection mlv1 on ps.BeginStationSeriesId = mlv1.StationSeriesId
                join MLVCorrection mlv2 on ps.EndStationSeriesId = mlv2.StationSeriesId
                where ps.EffectiveEndDate is null
                ) a on ((f.StationNum*mlv.MultiplierNum+mlv.FactorNum) between a.[begin_ps_c] and a.[end_ps_c]) and a.LinloopId = ll.id

            left join 
                (select ll.id [LinloopId],
                ll.LineName,
                maop.EffectiveEndDate,
                maop.BeginStationSeriesId,
                maop.MaxAllowablePressure [MAOP_kPa],
                maop.BeginStationNum*mlv1.MultiplierNum+mlv1.FactorNum [begin_maop_c],
                maop.EndStationNum*mlv2.MultiplierNum+mlv2.FactorNum [end_maop_c]
                from maop maop
                join StationSeries ss on ss.id = maop.BeginStationSeriesId
                join LineLoop ll on ss.LineLoopId = ll.Id
                join MLVCorrection mlv1 on maop.BeginStationSeriesId = mlv1.StationSeriesId
                join MLVCorrection mlv2 on maop.EndStationSeriesId = mlv2.StationSeriesId
                where maop.EffectiveEndDate is null
                ) b on ((f.StationNum*mlv.MultiplierNum+mlv.FactorNum) between b.[begin_maop_c] and b.[end_maop_c]) and b.[LinloopId] = ll.id 

            left join
                (select ROW_NUMBER() over(partition by ff.InlineInspectionRangeId order by ff.id asc)  [RN],
                ff.id,
                ff.InlineInspectionRangeId
                from InlineInspectionFeature ff
                )  c on f.Id = c.Id and f.InlineInspectionRangeId = c.InlineInspectionRangeId

            where f.Id in {seriesID_string}
            order by chainage_m asc"""
    
    return sqlserver_sql(query)

In [9]:
def get_cracks_for_poe(ILIRID, start=1, amt=999999999):
    query = f"""select
            c.RN,
            ll.linename [line],

            ld4.code [vendor], 

            format(r.ILIRStartDate,'yyyy-MM-dd') [ILIRStartDate],
            ld3.code [tool],

            f.ILICAAnomalyNumber [FeatureID],
            ld.code [status], 
            ld2.code [type],
            (f.StationNum*mlv.MultiplierNum+mlv.FactorNum) [chainage_m], 
            f.ILICASurfaceInd, 
            f.ILICADepthPct [depth_fraction], 
            f.ILICALength [length_mm], 
            f.ILICAWidth [width_mm], 

            format(a.PipeInserviceDate,'yyyy-MM-dd') [install_date],
            a.PipeOutsideDiameter [OD_inch],
            a.PipeWallThickness [WT_mm],
            a.PipeGrade [grade_MPa],
            a.PipeToughness [toughness_J],
            a.[begin_ps_c],
            a.[end_ps_c],

            b.MAOP_kPa,
            b.begin_maop_c,
            b.end_maop_c  

            from InlineInspectionCrackAnomaly f

            left join ListDomain ld on f.ILICAStatusDomainId = ld.Id
            left join ListDomain ld2 on f.ILICATypeDomainId = ld2.Id
            left join StationSeries ss on f.StationSeriesId = ss.id
            left join LineLoop ll on ss.LineLoopId = ll.Id
            left join inlineinspectionrange r on f.inlineinspectionrangeid = r.id
            left join ListDomain ld3 on r.ILIRToolDomainId = ld3.Id
            left join InlineInspection i on f.InlineInspectionId = i.Id
            left join ListDomain ld4 on i.ILICompanyDomainId = ld4.Id
            left join MLVCorrection mlv on f.StationSeriesId = mlv.StationSeriesId

            left join 
                (select ll.id [LinloopId],
                ll.LineName,
                ps.EffectiveStartDate,
                ps.PipeInserviceDate,
                ps.PipeOutsideDiameter,
                ps.PipeWallThickness,
                ps.PipeGrade,
                ps.PipeToughness,
                (ps.BeginStationNum*mlv1.MultiplierNum+mlv1.FactorNum) [begin_ps_c],
                (ps.EndStationNum*mlv2.MultiplierNum+mlv2.FactorNum) [end_ps_c]
                from PipeSegment ps
                join StationSeries ss on ps.BeginStationSeriesId = ss.id
                join LineLoop ll on ss.LineLoopId = ll.Id
                join MLVCorrection mlv1 on ps.BeginStationSeriesId = mlv1.StationSeriesId
                join MLVCorrection mlv2 on ps.EndStationSeriesId = mlv2.StationSeriesId
                where ps.EffectiveEndDate is null
                ) a on ((f.StationNum*mlv.MultiplierNum+mlv.FactorNum) between a.[begin_ps_c] and a.[end_ps_c]) and a.LinloopId = ll.id

            left join 
                (select ll.id [LinloopId],
                ll.LineName,
                maop.EffectiveEndDate,
                maop.BeginStationSeriesId,
                maop.MaxAllowablePressure [MAOP_kPa],
                maop.BeginStationNum*mlv1.MultiplierNum+mlv1.FactorNum [begin_maop_c],
                maop.EndStationNum*mlv2.MultiplierNum+mlv2.FactorNum [end_maop_c]
                from maop maop
                join StationSeries ss on ss.id = maop.BeginStationSeriesId
                join LineLoop ll on ss.LineLoopId = ll.Id
                join MLVCorrection mlv1 on maop.BeginStationSeriesId = mlv1.StationSeriesId
                join MLVCorrection mlv2 on maop.EndStationSeriesId = mlv2.StationSeriesId
                where maop.EffectiveEndDate is null
                ) b on ((f.StationNum*mlv.MultiplierNum+mlv.FactorNum) between b.[begin_maop_c] and b.[end_maop_c]) and b.[LinloopId] = ll.id 

            left join
                (select ROW_NUMBER() over(partition by ff.InlineInspectionRangeId order by ff.id asc)  [RN],
                ff.id,
                ff.InlineInspectionRangeId
                from InlineInspectionCrackAnomaly ff
                )  c on f.Id = c.Id and f.InlineInspectionRangeId = c.InlineInspectionRangeId

            where f.InlineInspectionRangeId = {ILIRID} and (c.RN between {start} and {start+amt})
            order by chainage_m asc"""
    
    return sqlserver_sql(query)

In [10]:
def get_cracks_by_ID(seriesID):
    seriesID_string = seriesID.aggregate(lambda x:  "(" + ', '.join(x.astype(str)) + ")")
    
    query = f"""select
            c.RN,
            ll.linename [line],

            ld4.code [vendor], 

            format(r.ILIRStartDate,'yyyy-MM-dd') [ILIRStartDate],
            ld3.code [tool],

            f.ILICAAnomalyNumber [FeatureID],
            ld.code [status], 
            ld2.code [type],
            (f.StationNum*mlv.MultiplierNum+mlv.FactorNum) [chainage_m], 
            f.ILICASurfaceInd, 
            f.ILICADepthPct [depth_fraction], 
            f.ILICALength [length_mm], 
            f.ILICAWidth [width_mm], 

            format(a.PipeInserviceDate,'yyyy-MM-dd') [install_date],
            a.PipeOutsideDiameter [OD_inch],
            a.PipeWallThickness [WT_mm],
            a.PipeGrade [grade_MPa],
            a.PipeToughness [toughness_J],
            a.[begin_ps_c],
            a.[end_ps_c],

            b.MAOP_kPa,
            b.begin_maop_c,
            b.end_maop_c  

            from InlineInspectionCrackAnomaly f

            left join ListDomain ld on f.ILICAStatusDomainId = ld.Id
            left join ListDomain ld2 on f.ILICATypeDomainId = ld2.Id
            left join StationSeries ss on f.StationSeriesId = ss.id
            left join LineLoop ll on ss.LineLoopId = ll.Id
            left join inlineinspectionrange r on f.inlineinspectionrangeid = r.id
            left join ListDomain ld3 on r.ILIRToolDomainId = ld3.Id
            left join InlineInspection i on f.InlineInspectionId = i.Id
            left join ListDomain ld4 on i.ILICompanyDomainId = ld4.Id
            left join MLVCorrection mlv on f.StationSeriesId = mlv.StationSeriesId

            left join 
                (select ll.id [LinloopId],
                ll.LineName,
                ps.EffectiveStartDate,
                ps.PipeInserviceDate,
                ps.PipeOutsideDiameter,
                ps.PipeWallThickness,
                ps.PipeGrade,
                ps.PipeToughness,
                (ps.BeginStationNum*mlv1.MultiplierNum+mlv1.FactorNum) [begin_ps_c],
                (ps.EndStationNum*mlv2.MultiplierNum+mlv2.FactorNum) [end_ps_c]
                from PipeSegment ps
                join StationSeries ss on ps.BeginStationSeriesId = ss.id
                join LineLoop ll on ss.LineLoopId = ll.Id
                join MLVCorrection mlv1 on ps.BeginStationSeriesId = mlv1.StationSeriesId
                join MLVCorrection mlv2 on ps.EndStationSeriesId = mlv2.StationSeriesId
                where ps.EffectiveEndDate is null
                ) a on ((f.StationNum*mlv.MultiplierNum+mlv.FactorNum) between a.[begin_ps_c] and a.[end_ps_c]) and a.LinloopId = ll.id

            left join 
                (select ll.id [LinloopId],
                ll.LineName,
                maop.EffectiveEndDate,
                maop.BeginStationSeriesId,
                maop.MaxAllowablePressure [MAOP_kPa],
                maop.BeginStationNum*mlv1.MultiplierNum+mlv1.FactorNum [begin_maop_c],
                maop.EndStationNum*mlv2.MultiplierNum+mlv2.FactorNum [end_maop_c]
                from maop maop
                join StationSeries ss on ss.id = maop.BeginStationSeriesId
                join LineLoop ll on ss.LineLoopId = ll.Id
                join MLVCorrection mlv1 on maop.BeginStationSeriesId = mlv1.StationSeriesId
                join MLVCorrection mlv2 on maop.EndStationSeriesId = mlv2.StationSeriesId
                where maop.EffectiveEndDate is null
                ) b on ((f.StationNum*mlv.MultiplierNum+mlv.FactorNum) between b.[begin_maop_c] and b.[end_maop_c]) and b.[LinloopId] = ll.id 

            left join
                (select ROW_NUMBER() over(partition by ff.InlineInspectionRangeId order by ff.id asc)  [RN],
                ff.id,
                ff.InlineInspectionRangeId
                from InlineInspectionCrackAnomaly ff
                )  c on f.Id = c.Id and f.InlineInspectionRangeId = c.InlineInspectionRangeId

            where f.Id in {seriesID_string}
            order by chainage_m asc"""
    
    return sqlserver_sql(query)

In [11]:
def check_table_overlap(table, s="SQL2017", d="IPL_IRASV6_STAGE"):
    # query for table to check for overlaps
    q1 = f"""set nocount on;
            select e.Id [RecordId],
            (mlv1.MultiplierNum*e.BeginStationNum+mlv1.FactorNum) [eBeginChainage],
            (mlv2.MultiplierNum*e.EndStationNum+mlv2.FactorNum) [eEndChainage], e.* from {table} e
            join MlvCorrection mlv1 on mlv1.StationSeriesId = e.BeginstationSeriesId
            join MlVcorrection mlv2 on mlv2.StationseriesId = e.EndStationSeriesId"""

    df2 = sqlserver_sql(q1, server=s, db=d)

    # sorting in increasing stationseriesid and chainage
    df2 = df2.sort_values(by=['BeginStationSeriesId','eBeginChainage']).reset_index(drop=True)

    # creating subset containing only the chainages
#     ss_df2 = df2[['BeginStationSeriesId','eBeginChainage','EndStationSeriesId','eEndChainage']]

    # following pandas statement displays the subtraction from one record's endchainage to the next records beginchainage
#     ss_df2.groupby(['BeginStationSeriesId','EndStationSeriesId']).apply(lambda x: x['eBeginChainage'].shift(-1)-x['eEndChainage']).fillna(0.00)

    # following statements filter out anything that doesn't have an overlap
#     ss_df2_overlaps = ss_df2.groupby(['BeginStationSeriesId','EndStationSeriesId']).apply(lambda x: x['eBeginChainage'].shift(-1)-x['eEndChainage']).reset_index(name='record_diff').fillna(0.00)
    
    df2 = df2.join(df2.groupby(['BeginStationSeriesId','EndStationSeriesId']).apply(lambda x:  x['eBeginChainage'].shift(-1)-x['eEndChainage']).reset_index(name='record_diff').fillna(0.00).record_diff)
    return df2[df2.columns.values[[0,1,2,-1]+[x for x in range(3,len(df2.columns)-1)]]]#.query("record_diff != 0.")

In [12]:
def check_table_span(table, s="SQL2017", d="IPL_IRASV6_STAGE"):
    # query for the table to check if records fall within bounds of stationseries
    q2 = f"""set nocount on;
            select ss.lineloopid [LineLoopId],
            (mlv1.MultiplierNum*e.BeginStationNum+mlv1.FactorNum) [eBeginChainage],
            (mlv2.MultiplierNum*e.EndStationNum+mlv2.FactorNum) [eEndChainage], e.* from {table} e
            join MlvCorrection mlv1 on mlv1.StationSeriesId = e.BeginstationSeriesId
            join MlVcorrection mlv2 on mlv2.StationseriesId = e.EndStationSeriesId
            join stationseries ss on e.Beginstationseriesid = ss.id
            """

    q3 = f"""set nocount on;
            select 
            (mlv.MultiplierNum*ss.BeginStationNum+mlv.FactorNum) [sBeginChainage],
            (mlv.MultiplierNum*ss.EndStationNum+mlv.FactorNum) [sEndChainage], ss.* from stationseries ss
            join MlvCorrection mlv on mlv.StationSeriesId = ss.id
            """

    df3 = sqlserver_sql(q2, server=s, db=d)
    df4 = sqlserver_sql(q3, server=s, db=d)

    # this next statement just aggregates the event table data and shows the span of the data. DOESN'T PICK OUT GAPS.
    # df3.groupby("LineLoopId").agg({'eBeginChainage':'min','eEndChainage':'max'})


    return df4.groupby("LineLoopId").agg({'sBeginChainage':'min','sEndChainage':'max'}).join(df3.groupby("LineLoopId").agg({'eBeginChainage':'min','eEndChainage':'max'})).assign(delta_end = lambda x: x.sEndChainage-x.eEndChainage,
                                                                                                                                                                                    delta_begin = lambda x: x.sBeginChainage-x.eBeginChainage).query("(delta_end!=0.) | (delta_begin!=0.)")

In [13]:
def impact_fault_tree(b1,b2,b3,b4,b5,b7,b9,b10,b11,b12,b6=0.40,b8=0.97):
    res = b1 * b12 * (1 - ((1-(1 - ((1-(b7 * b8 * (1 - ((1-(b2 * b3 * b4)) * (1-(b5 * b6)))))) * (1-(b4 * (1 - ((1-(b6 * b9)) * (1-(b6 * b10))))))))) * (1-b11)))
    return res

round(impact_fault_tree(0.03,0.24,0.23,1.,0.33,0.52,0.2,0.09,0.03,0.87),6) == 0.005684

True

In [14]:
def background_gradient(s, m=0, M=0.1, cmap='Reds', low=0, high=0):
    rng = M - m
    norm = colors.Normalize(m - (rng * low),
                            M + (rng * high))
    normed = norm(s.values)
    c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]
    return ['background-color: %s' % color for color in c]

In [15]:
b1 = np.array([0.52,0.26,0.36,0.076,0.06,0.06,0.092,0.660,0.380])
impact_fault_tree(b1,0.24,0.23,1.0,0.33,0.95,0.2,0.09,0.03,0.42)

array([0.06166557, 0.03083278, 0.04269155, 0.00901266, 0.00711526,
       0.00711526, 0.01091006, 0.07826783, 0.0450633 ])

Dynamic segmentation creation

In [48]:
def return_table(table):
    return f"""
            select
            ps.beginstationseriesid, ps.beginstationnum, ps.endstationseriesid, ps.endstationnum
            from {table} ps
            join stationseries ss on ps.beginstationseriesid = ss.id
            join lineloop ll on ss.lineloopid = ll.id
            where ll.linename = 'LS7108'"""

def return_ptable(table):
    return f"""
            select 
            t.stationseriesid, t.stationnum 
            from {table} t
            join stationseries ss on t.beginstationseriesid = ss.id
            join lineloop ll on ss.lineloopid = ll.id
            where ll.linename = 'LS7108'"""

def return_vt(table):
    return f"""
            select
            vts.beginstationseriesid, vts.beginstationnum, vts.endstationseriesid, vts.endstationnum
            from virtualtablestationing vts
            join stationseries ss on vts.beginstationseriesid = ss.id
            join lineloop ll on ss.lineloopid = ll.id
            join virtualtable vt on vts.virtualtableid = vt.id
            where ll.linename = 'LS7108' and vt.VirtualTableName = '{table}'"""

def ili_segmentation(segment):
    q = """
        select
        ss.id, ss.beginstationnum, ss.endstationnum
        from stationseries ss
        join lineloop ll on ss.lineloopid = ll.id
        where ll.linename = 'LS7108'"""
    temp = sqlserver_sql(q4)
    line_length = temp.endstationnum.values
    correct_segment = line_length/np.ceil(line_length/segment)
    return pd.Series(np.arange(0, line_length, correct_segment))

q = """
        select * from virtualtable"""

vts = ["EnvironmentMedium",
"HabitatWildlifeSensitivityCat",
"SAF_PopulationPIR",
"SAF_PopulationIIZ",
"SAF_PopulationEPZ",
"CGA_ECGrowthRateMean",
"CGA_ECGrowthRateSD",
"CGA_ICGrowthRateMean",
"CGA_ICGrowthRateSD",
"EA_LFERWSeverity",
"EA_SCCSeverity",
# "MDBuriedMarkers",
# "MDOperatorResponse",
# "MDPatrolFrequency",
# "MDPipeFinding",
# "MDPipeMarking",
# "MDPublicAwareness",
# "MDSignage",
# "MDThirdPartyNotification",
      ]

# sqlserver_sql(q).query('VirtualTableName.isin(@vts)')

doc = sqlserver_sql(return_table('coverdepth'))
maop = sqlserver_sql(return_table('maop'))
cl = sqlserver_sql(return_table('classarea'))
lu = sqlserver_sql(return_table('landuse'))
ps = sqlserver_sql(return_table('pipesegment'))
env = sqlserver_sql(return_table('irascustomdata11'))
geo = sqlserver_sql(return_table('irascustomdata1'))
hyd = sqlserver_sql(return_table('irascustomdata2'))
envmed = sqlserver_sql(return_vt('EnvironmentMedium'))
hab = sqlserver_sql(return_vt('HabitatWildlifeSensitivityCat'))
pir = sqlserver_sql(return_vt('SAF_PopulationPIR'))
iiz = sqlserver_sql(return_vt('SAF_PopulationIIZ'))
epz = sqlserver_sql(return_vt('SAF_PopulationEPZ'))
eccgr = sqlserver_sql(return_vt('CGA_ECGrowthRateMean'))
iccgr = sqlserver_sql(return_vt('CGA_ICGrowthRateMean'))
lferw = sqlserver_sql(return_vt('EA_LFERWSeverity'))
scc = sqlserver_sql(return_vt('EA_SCCSeverity'))
gw = sqlserver_sql(return_ptable('GirthWeld'))
v = sqlserver_sql(return_ptable('Valve'))

pd.Series()\
.append([
#         doc.beginstationnum,doc.endstationnum,
         maop.beginstationnum,maop.endstationnum,
         cl.beginstationnum,cl.endstationnum,
         lu.beginstationnum,lu.endstationnum,
         ps.beginstationnum,ps.endstationnum,
         env.beginstationnum,env.endstationnum,
         geo.beginstationnum,geo.endstationnum,
        hyd.beginstationnum,hyd.endstationnum,
        envmed.beginstationnum,envmed.endstationnum,
        hab.beginstationnum,hab.endstationnum,
        pir.beginstationnum,pir.endstationnum,
        iiz.beginstationnum,iiz.endstationnum,
        epz.beginstationnum,epz.endstationnum,
        eccgr.beginstationnum,eccgr.endstationnum,
        iccgr.beginstationnum,iccgr.endstationnum,
        lferw.beginstationnum,lferw.endstationnum,
        scc.beginstationnum,scc.endstationnum,
#         gw.stationnum,
        v.stationnum,
        ili_segmentation(100.),
        ]).sort_values().drop_duplicates().reset_index(drop=True)



0            0.000000
1           99.975655
2          103.921000
3          119.229000
4          165.921000
            ...      
4756    293821.782400
4757    293827.371400
4758    293828.442000
4759    293828.450400
4760    293828.451400
Length: 4761, dtype: float64

# Risk Results Review

## QC Preparation 

In [None]:
%%time
try:
    del results
except:
    pass
results = pd.read_csv("Pipeline_SCC_LFERW_ENV.csv")
results.info(memory_usage='deep')

Setting dynamic segment length to ds_length

In [None]:
results.loc[:,'ds_length'] = results.EndMeasure-results.BeginMeasure

Querying the LineLoop table from database

In [None]:
q1 = """select ll.Id [LineID], ll.LineName, ld.code [ll_status], ps.PipelineSystemName from lineloop ll
        join listdomain ld on ll.LineLoopStatusLd = ld.id
        join pipelinesystem ps on ll.pipelinesystemid = ps.id
        """
ll_table = sqlserver_sql(q1)
ll_table.groupby('PipelineSystemName').count()
# ll_table.head()

Converting LineID column to int64.
Setting index to LineID

In [None]:
ll_table.loc[:,'LineID'] = ll_table.LineID.astype('int64')
ll_table.set_index('LineID', inplace=True)
ll_table

Merging LineLoop query to the results.

In [None]:
%%time
results = pd.merge(results,ll_table, how='inner', left_on="LineID", right_index=True, suffixes=('','_db'), indicator=True)#.loc[:,:]

Count of pipelines for assurance check.

In [None]:
results.groupby('PipelineSystemName').LineID.nunique()

In [None]:
tsl = results.ds_length.sum()

## QC Start

In [None]:
q = """select * from productcomponent"""

sqlserver_sql(q)

In [None]:
pd.DataFrame(results.columns, columns=['col']).query("col.str.contains('inservice', case=False)")

In [None]:
def pct_error(col1,col2):
    return abs(col1 - col2)*100./col2

# ## results.assign(qc = lambda x: x.Impact_Frequency_perkmyr*x.MD_Past_Incidents_Multiplier*x.Probability_of_Failure_Given_a_Hit,
#               pct_error = lambda x: abs(x.qc - x.Mechanical_Damage_Failure_Frequency_perkmyr)*100./x.Mechanical_Damage_Failure_Frequency_perkmyr).loc[:,['qc','Mechanical_Damage_Failure_Frequency_perkmyr','pct_error']].describe()

# ## checking for nulls
# results.loc[:,['Maximum_Operating_Pressure_PSI',
#                'Outside_Diameter_Inch',
#                'Wall_Thickness_Inch',
#                'Grade_KSI',
#                'ds_length']].fillna(-1).loc[lambda x: (x.Grade_KSI == -1),:].ds_length.sum()*100/tsl

def calc_repair_costs(d):
    temp = d.copy()
    
    temp['Cover Depth (m)'] = temp['Cover Depth (m)'].fillna(0.76)
    temp.loc[temp['Cover Depth (m)'] < 0.00, 'Cover Depth (m)'] = 0.00
    
    base_repair = 250000.0
    watercourse_var = 25000.0
    pipe_base_Repair = 630.0
    temp.loc[lambda x: x['Land Use'] == 'WATER COURSE','cost'] = base_repair + watercourse_var*np.power(temp['Bank Full Width of Watercourse m (m)'],1.2) + pipe_base_Repair*np.power(temp['Pipe Outside Diameter (mm)'],0.6)

    exc_backfill = 1000.0
    temp.loc[lambda x: x['Land Use'] != 'WATER COURSE','cost'] = exc_backfill*(1.30*(temp['Cover Depth (m)']+temp['Pipe Outside Diameter (mm)']/1000+0.6)*(temp['Pipe Outside Diameter (mm)']/1000+2)*20) + pipe_base_Repair*np.power(temp['Pipe Outside Diameter (mm)'],0.6)
    
    return temp.cost.values

def calc_product_costs(d):
    temp = d.copy()
    
    state = temp['Mixture State'] == 'Liquid'
    check = temp['Upstream Valve Section 1 Type'] == 'CHECK'
    temp.loc[:,'spill'] = temp['Pipe Cross Sectional Area sqmeter (m^2)']*temp['Valve Section 1 Length m']*temp['Drain Down Factor']
#     temp.loc[(check),'spill'] = temp['Pipe Cross Sectional Area sqmeter (m^2)']*temp['Valve Section 2 Length m']*temp['Drain Down Factor']
#     temp.loc[(~check),'spill'] = temp['Pipe Cross Sectional Area sqmeter (m^2)']*temp['Valve Section 1 Length m']*temp['Drain Down Factor']

    return temp.spill.values

#checkin Environment Impact.
temp = results.copy()
temp.name = "Environment Impact QAQC"
temp['C1'] = temp.filter(regex='C1').eq('Y').any('columns').replace({False:"N",True:"Y"})
temp['C2'] = temp.filter(regex='C2').eq('Y').any('columns').replace({False:"N",True:"Y"})
temp['C3'] = temp.filter(regex='C3').eq('Y').any('columns').replace({False:"N",True:"Y"})
temp['C4'] = temp.filter(regex='C4').eq('Y').any('columns').replace({False:"N",True:"Y"})
temp['C5'] = temp.filter(regex='C5').eq('Y').any('columns').replace({False:"N",True:"Y"})
temp['Product Type'] = temp['Product Type'].replace(to_replace=['Bow River Crude', np.nan, 'Condensate', 'CAPL Crude',
       'MSPL Heavy Crude', 'MSPL Light Crude',
       'CLPL Diluent', 'Dilbit', 'CLPL Dilbit', 'Diluent',
       'Products'], value='Liquid').replace(to_replace=['Natural Gas','Ethane Plus'], value='Gas')
temp[['C1','C2','C3','C4','C5','Environment Impact','Product Type']].groupby(['Product Type','C5','C4','C3','C2','C1','Environment Impact']).count().xs('Liquid')


In [None]:
results.assign(QC=lambda x: np.where(x['Mixture State']=='Liquid',
                                       x['Pipe Cross Sectional Area sqmeter (m^2)']*x['Valve Section 1 Length m']*x['Drain Down Factor'],
                                       x['Pipe Cross Sectional Area sqmeter (m^2)']*x['Valve Section 1 Length m']),
              error=lambda x: np.where(x['Mixture State']=='Liquid',
                                       pct_error(x['Total Liquid Spill Inventory m3 (m^3)'],x.QC),
                                       pct_error(x['Total Gas Release Inventory m3 (m^3)'],x.QC)))\
.loc[:,['Mixture State','Total Liquid Spill Inventory m3 (m^3)','Total Gas Release Inventory m3 (m^3)','Pipe Cross Sectional Area sqmeter (m^2)','Valve Section 1 Length m','Drain Down Factor','QC','error']].query('error > 50')

In [None]:
%matplotlib notebook
# results.assign(QC=lambda x: impact_fault_tree(*list(x.filter(regex='^B[^e][0-9]*'))) )

results.assign(QC=lambda x: impact_fault_tree(x['B1 Pipe Location Factor perkmyr (/km.yr)'],
                                             x['B2 Public Awareness Factor'],
                                             x['B3 Signage Factor'],
                                             x['B4 Buried Markers Factor'],
                                             x['B5 Third Party Notification Factor'],
                                             x['B7 Patrol Frequency Factor'],
                                             x['B9 Operator Response Factor'],
                                             x['B10 Pipe Finding Factor'],
                                             x['B11 Pipe Marking Factor'],
                                             x['B12 Depth of Cover Factor']),
              error = lambda x: pct_error(x['Impact Frequency perkmyr (/km.yr)'], x.QC))\
[['Impact Frequency perkmyr (/km.yr)','QC','error']].plot.scatter(x='Impact Frequency perkmyr (/km.yr)',
                                                          y='error', marker='s', color='white', edgecolor='k')



In [None]:
results.assign(spill_gal = lambda x: x['Total Liquid Spill Inventory m3 (m^3)']*264.172,
               spill_buck = lambda x: pd.cut(x.spill_gal,
                                            [0.,500.0,1000.0,10000.0,100000.0,1.0e6,np.inf],
                                            labels=['<500','<1000','<10,000','<100,000','<1,000,000','>1,000,000']))\
.replace({'Product Type':{'Bow River Crude':1,
                         'Condensate|Diluent|CLPL Diluen.*':2,
                         'MSPL Light.*|CAPL.*':3,
                         'MSPL Heavy.*|Dilbit|Products|CLPL Dilb.*':4}}, regex=True).groupby(['Product Type','spill_buck'])['Environmental Damage Costs USDpergallon'].apply(np.mean)

In [None]:
results.assign(QC = lambda x: (x['Spill Response Costs USD (USD)']+x['Environmental Damage Costs USD (USD)'])*1.2802*1.32,
              error = lambda x: pct_error(x['Cleanup Costs CDN (USD)'],x.QC))[['Cleanup Costs CDN (USD)','QC','error']]\
.describe()

## QC of POE (EC/IC) 

In [None]:
#Pipeline name, and begin and end chainages

results[['Line Name', 'Begin Measure (m)', 'End Measure (m)', 'Length (m)','Pipe Wall Thickness (mm)','Pipe Inservice Date','Wall Thickness Pipe Age Ratio','IC Method 1 Inferential LOF (/km.yr)']].\
assign(QC=lambda x: pd.cut(x['Wall Thickness Pipe Age Ratio'],
                          [-np.inf,0.127,0.254,0.381,0.508,np.inf],
                        labels=[0.01,0.001,0.0001,0.00001,0.000001]).astype(float).fillna(0.1),
        EQCHECK=lambda x: x.QC==x['IC Method 1 Inferential LOF (/km.yr)']).\
describe(include='all')

In [None]:
results[['Line Name', 'Begin Measure (m)', 'End Measure (m)', 'Length (m)','Pipe Wall Thickness (mm)','Pipe Inservice Date','Pipe Outside Diameter (in)']]\
[lambda x: x['Pipe Outside Diameter (in)'].isnull()].groupby('Line Name').sum()['Length (m)']#.to_csv('ls2578_install_beyond_2019.csv')

In [15]:
def critical_depth_modified_b31g(od, wt, s, p, fL, units="SI"):
    """
    Calculates the failure stress using the Modified B31G Equation
    :param od:  Pipe outside diameter, in mm (SI), or inches (US)
    :param wt:  Pipe wall thickness, in mm (SI), or inches (US)
    :param s:   Pipe grade, in kPa (SI), or psi (US)
    :param p:   pressure, in kPa (SI), or psi (US)
    :param fL:  feature length, in mm (SI), or inches (US)
    :param units: flag for which units to use, "SI" or "US", default "SI"
    :return: Critical depth, in mm (SI), or inches (US)
    """

    l2Dt = np.power(fL, 2.0)/(od*wt)
    Mt = np.where(l2Dt <= 50.0,
                  np.sqrt( 1.0 +(0.6275*l2Dt)-(0.003375*np.power(l2Dt, 2.0))),
                  0.032*l2Dt+3.3)
    if units=="SI":
        flowS = s + 68947.6
    else:
        flowS = s + 10000.0

    opStress = (p*od)/(2.*wt)
    
    critical_d = ((opStress-flowS)*wt)/(0.85*((opStress/Mt)-flowS))
#     return np.minimum(critical_d/wt,0.8)
    return critical_d

def statistical_poe(df):
    ## CAUTION: Equation for now creates additional columns in the input dataframe
    # ILI Age in years
#     df = i_df.copy()
#     df.loc[:,'ILI Age'] = (pd.datetime.today() - pd.to_datetime(df.loc[:,'ILIRStartDate'])).dt.days/365.25
    df.loc[:,'ILI Age'] = (pd.Timestamp(year=2019, month=12, day=31) - pd.to_datetime(df.loc[:,'ILIRStartDate'])).dt.days/365.25

    if 'vendor_cgr_mmpyr' in df.columns:
        df.loc[:, 'm_cgr'] = df.loc[:, 'vendor_cgr_mmpyr']
        df.loc[:, 'sd_cgr'] = 0.00
    else:
        df.loc[:, 'm_cgr'] = np.where(df.ILIFSurfaceInd.str.contains('E', case=False), 0.3048, 0.127)
        df.loc[:, 'sd_cgr'] = 0.25 * df.loc[:, 'm_cgr']
        
    # Measured Depth in mm
    df.loc[:,'depth_run_mm'] = df.loc[:,'depth_fraction']*df.loc[:,'WT_mm']

    # Failure Depth in mm
    df.loc[:, 'failure_depth_mm'] = critical_depth_modified_b31g(df.loc[:,'OD_inch']*25.4, df.loc[:,'WT_mm'], df.loc[:,'grade_MPa']*1000., df.loc[:,'MAOP_kPa'], df.loc[:,'length_mm'])
    
    # Mean of Depth in mm
    df.loc[:,'mean_depth_mm'] = (df.loc[:,'WT_mm']*0.00) + df.loc[:,'depth_run_mm'] + (df.loc[:,'ILI Age']*df.loc[:, 'm_cgr'])
    
    # SD of Depth in mm
    df.loc[:,'sd_depth_mm'] = np.sqrt(  np.power(0.078*df.loc[:,'WT_mm'],2) +  np.power(df.loc[:,'ILI Age'],2) * np.power(df.loc[:, 'sd_cgr'], 2))
    
    df.loc[:,'leak_poe'] = 1.0 - norm.cdf(0.80*df.loc[:,'WT_mm'], loc=df.loc[:,'mean_depth_mm'], scale= df.loc[:,'sd_depth_mm'])
    df.loc[:,'rupture_poe'] = 1.0 - norm.cdf(df.loc[:,'failure_depth_mm'], loc=df.loc[:,'mean_depth_mm'], scale= df.loc[:,'sd_depth_mm'])

    return np.where(df.loc[:,'failure_depth_mm']/df.loc[:,'WT_mm']> 0.80, df.loc[:,'leak_poe'], df.loc[:,'rupture_poe'])


Following logic loads features for the selected pipeline's ILIs

In [None]:
get_ili_ranges("LS2195")

In [None]:
qcdf = get_features_for_poe(141)

In [None]:
pd.DataFrame(qcdf.columns, columns=['col']).query("col.str.contains('', case=False)")

Following logic loads features from the clipboard

In [22]:
df2 = pd.read_clipboard()
df2

Unnamed: 0,Assessment Row Number,Line,ILIRStartDate,tool,ILIFFeatureNumber,status,type,chainage_m,ILIFSurfaceInd,depth_fraction,length_mm,width_mm,install_date,OD_inch,WT_mm,grade_MPa,MAOP_kPa,LineName,ectb,ec_cgr,ictb,ic_cgr,vendor_cgr_mmpyr
0,1899,LS2078,2018-04-03,MFL,DMA 812,Active,General,1148.81304,E,0.61,24.994,22.987,1973-01-01,10.75,4.40004,390,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702
1,1872,LS2078,2018-04-03,MFL,CLS 54,Active,General,1116.99603,E,0.37,152.984,110.998,1973-01-01,10.75,4.40004,390,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702
2,2024,LS2078,2018-04-03,MFL,CLS 903,Active,General,5844.80519,E,0.37,134.976,32.995,1973-01-01,10.75,4.40004,390,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702
3,2003,LS2078,2018-04-03,MFL,CLS 882,Active,General,4949.27216,E,0.35,197.993,47.981,1973-01-01,10.75,4.40004,390,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702
4,1973,LS2078,2018-04-03,MFL,CLS 759,Active,General,3424.95411,E,0.34,121.996,22.987,1973-01-01,10.75,4.40004,390,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702
5,1986,LS2078,2018-04-03,MFL,CLS 790,Active,General,3652.62112,E,0.33,121.996,67.996,1973-01-01,10.75,4.40004,390,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702
6,1920,LS2078,2018-04-03,MFL,CLS 401,Active,General,1933.83106,E,0.31,127.991,79.985,1973-01-01,10.75,4.40004,390,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702
7,1938,LS2078,2018-04-03,MFL,CLS 407,Active,General,1936.07206,E,0.29,287.985,59.995,1973-01-01,10.75,4.40004,390,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702
8,2057,LS2078,2018-04-03,MFL,CLS 1047,Active,General,12271.88239,E,0.27,180.975,80.975,1973-01-01,10.75,4.40004,390,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702
9,1934,LS2078,2018-04-03,MFL,CLS 406,Active,General,1935.67906,E,0.25,150.978,54.991,1973-01-01,10.75,4.40004,390,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702


In [17]:
df2 = get_features_for_poe_by_ID(df.Id)

AttributeError: 'DataFrame' object has no attribute 'Id'

In [None]:
q = """select * from virtualtable vt"""
#169 EC, 171 IC

cgr_query = """select 
            ll.LineName, 
            vt.VirtualTableName [ectb], 
            vtd.DataFieldValueNum [ec_cgr],
            a.ictb,
            a.ic_cgr
             from VirtualTableDataNum vtd
            join VirtualTableStationing vts on vtd.VirtualTableStationingId = vts.VirtualTableStationingId and vtd.VirtualTableId = vts.VirtualTableId
            join StationSeries ss on vts.BeginStationSeriesId = ss.Id
            join LineLoop ll on ss.LineLoopId = ll.Id
            join VirtualTable vt on vtd.VirtualTableId = vt.Id

            full join (select ll.LineName, vt.VirtualTableName [ictb], vtd.DataFieldValueNum [ic_cgr] from VirtualTableDataNum vtd
                join VirtualTableStationing vts on vtd.VirtualTableStationingId = vts.VirtualTableStationingId and vtd.VirtualTableId = vts.VirtualTableId
                join StationSeries ss on vts.BeginStationSeriesId = ss.Id
                join LineLoop ll on ss.LineLoopId = ll.Id
                join VirtualTable vt on vtd.VirtualTableId = vt.Id
                where vtd.VirtualTableId = 171 ) a on ll.LineName = a.LineName

            where vtd.VirtualTableId = 169
        """
cgr_df = sqlserver_sql(cgr_query)
cgr_df

In [None]:
df2 = df2.merge(cgr_df, left_on='line',right_on='LineName')

In [None]:
# df2.loc[:,['ILIFSurfaceInd','ec_cgr','ic_cgr']]
# Need to do this through apply, since using Numpy np.where and 
# assigning to dataframe column gave the wrong error.
# Need to perform this on a row by row manner, so .apply is more appropriate.
df2.loc[:,'vendor_cgr_mmpyr'] = df2.apply(lambda x: np.where(x.ILIFSurfaceInd.lower() == 'e', x.ec_cgr, x.ic_cgr), axis=1)
# df2

In [None]:
df2.query("LineName.eq('LS2000')").loc[lambda x: x.chainage_m.isin([3712.01812,
3714.41812,
3721.91212,
3722.29312,
3722.43812,
3728.40412,
]),:]#.apply(lambda x: x.ILIFSurfaceInd == 'E', axis=1)

In [29]:
def background_gradient(s, m=0, M=0.1, cmap='Reds', low=0, high=0):
    rng = M - m
    norm = colors.Normalize(m - (rng * low),
                            M + (rng * high))
    normed = norm(s.values)
    c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]
    return ['background-color: %s' % color for color in c]

temp = df2.assign(poe = statistical_poe(df2),
                    pct_smys=lambda x: (x.MAOP_kPa*x.OD_inch*25.4)/(20*x.WT_mm*x.grade_MPa))#.query("chainage_m.between(57882.022,57900.49) & ILIFSurfaceInd =='I'").drop_duplicates('FeatureID')

display(temp.style.apply(background_gradient, subset=pd.IndexSlice[:,['leak_poe','rupture_poe']]))
# print(f"LEAK POE: {1- np.prod(1 - temp.leak_poe):.3e} - RUPTURE POE: {1- np.prod(1 - temp.rupture_poe):.3e}")
# del temp

Unnamed: 0,Assessment Row Number,Line,ILIRStartDate,tool,ILIFFeatureNumber,status,type,chainage_m,ILIFSurfaceInd,depth_fraction,length_mm,width_mm,install_date,OD_inch,WT_mm,grade_MPa,MAOP_kPa,LineName,ectb,ec_cgr,ictb,ic_cgr,vendor_cgr_mmpyr,ILI Age,m_cgr,sd_cgr,depth_run_mm,failure_depth_mm,mean_depth_mm,sd_depth_mm,leak_poe,rupture_poe,poe,pct_smys
0,1899,LS2078,2018-04-03,MFL,DMA 812,Active,General,1148.81304,E,0.61,24.994,22.987,1973-01-01,10.75,4.40004,359,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702,1.744011,0.016702,0.0,2.684024,4.455927,2.713153,0.343203,0.009361,0.0,0.009361,65.772686
1,1872,LS2078,2018-04-03,MFL,CLS 54,Active,General,1116.99603,E,0.37,152.984,110.998,1973-01-01,10.75,4.40004,359,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702,1.744011,0.016702,0.0,1.628015,2.761171,1.657143,0.343203,0.0,0.000648,0.000648,65.772686
2,2024,LS2078,2018-04-03,MFL,CLS 903,Active,General,5844.80519,E,0.37,134.976,32.995,1973-01-01,10.75,4.40004,359,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702,1.744011,0.016702,0.0,1.628015,2.818671,1.657143,0.343203,0.0,0.000357,0.000357,65.772686
3,2003,LS2078,2018-04-03,MFL,CLS 882,Active,General,4949.27216,E,0.35,197.993,47.981,1973-01-01,10.75,4.40004,359,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702,1.744011,0.016702,0.0,1.540014,2.66852,1.569143,0.343203,0.0,0.000679,0.000679,65.772686
4,1973,LS2078,2018-04-03,MFL,CLS 759,Active,General,3424.95411,E,0.34,121.996,22.987,1973-01-01,10.75,4.40004,359,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702,1.744011,0.016702,0.0,1.496014,2.871817,1.525142,0.343203,0.0,4.4e-05,4.4e-05,65.772686
5,1986,LS2078,2018-04-03,MFL,CLS 790,Active,General,3652.62112,E,0.33,121.996,67.996,1973-01-01,10.75,4.40004,359,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702,1.744011,0.016702,0.0,1.452013,2.871817,1.481142,0.343203,0.0,2.5e-05,2.5e-05,65.772686
6,1920,LS2078,2018-04-03,MFL,CLS 401,Active,General,1933.83106,E,0.31,127.991,79.985,1973-01-01,10.75,4.40004,359,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702,1.744011,0.016702,0.0,1.364012,2.845818,1.393141,0.343203,0.0,1.2e-05,1.2e-05,65.772686
7,1938,LS2078,2018-04-03,MFL,CLS 407,Active,General,1936.07206,E,0.29,287.985,59.995,1973-01-01,10.75,4.40004,359,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702,1.744011,0.016702,0.0,1.276012,2.57859,1.30514,0.343203,0.0,0.000103,0.000103,65.772686
8,2057,LS2078,2018-04-03,MFL,CLS 1047,Active,General,12271.88239,E,0.27,180.975,80.975,1973-01-01,10.75,4.40004,359,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702,1.744011,0.016702,0.0,1.188011,2.697133,1.217139,0.343203,0.0,8e-06,8e-06,65.772686
9,1934,LS2078,2018-04-03,MFL,CLS 406,Active,General,1935.67906,E,0.25,150.978,54.991,1973-01-01,10.75,4.40004,359,7609.9967,LS2078,CGA_ECGrowthRateMean,0.016702,CGA_ICGrowthRateMean,0.011169,0.016702,1.744011,0.016702,0.0,1.10001,2.76682,1.129139,0.343203,0.0,1e-06,1e-06,65.772686


In [31]:
temp.to_clipboard(index=False)

Repeat assessment reading Ray's spreadsheet export

In [19]:
df3 = pd.read_clipboard()

In [None]:
df2.columns

In [None]:
df3.columns

In [None]:
df3.rename(columns={
#                     'linename':'line',
#                    'ilirstartdate':'ILIRStartDate',
#                    'ilifsurfaceind':'ILIFSurfaceInd',
#                    'ILIFPeakDepthPct':'depth_fraction',
#                    'iliflength':'length_mm',
#                    'PIpeOutsideDiameter':'OD_inch',
#                    'PipeWallThickness':'WT_mm',
#                    'PipeGrade':'grade_MPa',
#                    'maxallowablepressure':'MAOP_kPa',
                   "Applicable CGR mmpyr":"vendor_cgr_mmpyr"},inplace=True)



In [None]:
df3.loc[:,'vendor_cgr_mmpyr'] = np.where(df3.ILIFSurfaceInd == 'E', df3.CGA_ECGrowthRateMean, df3.CGA_ICGrowthRateMean)

In [None]:
temp2 = df3.assign(poe = statistical_poe(df3),
                    pct_smys=lambda x: (x.MAOP_kPa*x.OD_inch*25.4)/(20*x.WT_mm*x.grade_MPa))#.query("chainage_m.between(57882.022,57900.49) & ILIFSurfaceInd =='I'").drop_duplicates('FeatureID')

# display(temp2.style.apply(background_gradient, subset=pd.IndexSlice[:,['leak_poe','rupture_poe']]))
# print(f"LEAK POE: {1- np.prod(1 - temp.leak_poe):.3e} - RUPTURE POE: {1- np.prod(1 - temp.rupture_poe):.3e}")
# del temp
temp2.to_clipboard()

## QC of POE (SCC/LFERW)

In [13]:
import importlib.util
spec = importlib.util.spec_from_file_location("useful_func", r"C:\Users\armando_borjas\Documents\Python\Calculators\general_POE.py")
general_POE = importlib.util.module_from_spec(spec)
spec.loader.exec_module(general_POE)

### SCC

Loading crack data from the database

In [None]:
get_ili_ranges('LS2195')

crack_qcdf=get_cracks_for_poe(141)
crack_qcdf = crack_qcdf[crack_qcdf.type.isin(['Crack-Like','Crack-Cluster'])]
crack_qcdf.vendor = 'Rosen3'
crack_qcdf.tool = 'UTCD'
# crack_qcdf.toughness_J = 20.0
# crack_qcdf.depth_fraction = crack_qcdf.depth_fraction/100

In [None]:
conditions = [pd.to_datetime(crack_qcdf.install_date).dt.year < 1970,
             (crack_qcdf.OD_inch < 16) & (pd.to_datetime(crack_qcdf.install_date).dt.year >= 1970),
             (crack_qcdf.OD_inch >= 16) & (pd.to_datetime(crack_qcdf.install_date).dt.year >= 1970)]
choice = [10.0, 
          20.0, 
          27.0]

crack_qcdf.toughness_J = np.select(conditions, choice)

Loading crack data from the clipboard as a consequence of a special request

In [None]:
df = pd.read_clipboard()

In [None]:
df2 = get_cracks_by_ID(df.Id)

In [None]:
conditions = [pd.to_datetime(df2.install_date).dt.year < 1970,
             (df2.OD_inch < 16) & (pd.to_datetime(df2.install_date).dt.year >= 1970),
             (df2.OD_inch >= 16) & (pd.to_datetime(df2.install_date).dt.year >= 1970)]
choice = [10.0, 
          20.0, 
          27.0]

df2.toughness_J = pd.Series(np.select(conditions, choice))
df2.vendor = 'Rosen3'
df2

In [None]:
config = dict(run_date='2019-12-31',
             weibull_shape=2.0,
             weibull_scale=0.26,
             rupt_thresh=1.1,
             iterations=1_000_000)

SCC = general_POE.MonteCarlo('SCC', config=config)
SCC.df = df2#.query("chainage_m.between(6201.048,6215.525)")
SCC.process_dates()
SCC.run()
SCC.merge_result('FeatureID').style.apply(background_gradient, subset=pd.IndexSlice[:,['POE_l','POE_r']])


#print(f"LEAK POE: {1- np.prod(1 - temp.leak_poe):.3e} - RUPTURE POE: {1- np.prod(1 - temp.rupture_poe):.3e}")

In [None]:
SCC.merge_result('FeatureID').to_clipboard()

In [None]:
config = dict(run_date='2019-12-31',
             weibull_shape=2.0,
             weibull_scale=0.26,
             rupt_thresh=1.1,
             iterations=1_000_000)

SCC2 = general_POE.MonteCarlo('SCC', config=config)
SCC2.df = df2#.query("chainage_m.between(6201.048,6215.525)")

qc_cols = ['line',
            'FeatureID',
            'vendor',
            'tool',
            'ILIRStartDate',
            'status',
            'type',
            'ILIFSurfaceInd',
            'chainage_m',
            'depth_fraction',
            'length_mm',
            'width_mm',
            'vendor_cgr_mmpyr',
            'vendor_cgr_sd',
            'OD_inch',
            'WT_mm',
            'grade_MPa',
            'toughness_J',
            'install_date',
            'coating_type',
            'incubation_yrs',
            'MAOP_kPa',
            'PMax_kPa',
            'PMin_kPa',
            'AESC']

missing_cols = pd.Index(np.extract(~pd.Index(qc_cols).isin(SCC2.df.columns),
                                   qc_cols))

SCC2.df = SCC2.df.reindex(columns=[*SCC2.df.columns.append(missing_cols)],
                           fill_value=np.nan)

SCC2.process_dates()
SCC2.special_run()
SCC2.merge_result('FeatureID').style.apply(background_gradient, subset=pd.IndexSlice[:,['POE_l','POE_r']])


#print(f"LEAK POE: {1- np.prod(1 - temp.leak_poe):.3e} - RUPTURE POE: {1- np.prod(1 - temp.rupture_poe):.3e}")

In [None]:
comparison = pd.concat([SCC2.result[['POE','POE_l','POE_r']],SCC.result[['POE','POE_l','POE_r']]],axis=1)
tolerance = np.isclose(comparison.iloc[:,0],comparison.iloc[:,3],atol=1e-3)
comparison
# tolerance

### LFERW

Loading LFERW crack data from the clipboard as a consequence of a special request

In [14]:
df = pd.read_clipboard()

In [15]:
df.describe(include='all')

Unnamed: 0,OBJECTID,IPL_INLINEINSPECTIONCRACKANOMALY_Id,IPL_INLINEINSPECTIONCRACKANOMALY_EffectiveStartDate,IPL_INLINEINSPECTIONCRACKANOMALY_EffectiveEndDate,IPL_INLINEINSPECTIONCRACKANOMALY_InlineInspectionRangeId,IPL_INLINEINSPECTIONCRACKANOMALY_ILICATypeDomainId,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAStatusDomainId,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAOdometer,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAFeatureNumber,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAPeakDepthPct,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAGirthWeldNum,IPL_INLINEINSPECTIONCRACKANOMALY_ILICADistanceFromGirthWeld,IPL_INLINEINSPECTIONCRACKANOMALY_ILICALength,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAWidth,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAOrientation,IPL_INLINEINSPECTIONCRACKANOMALY_ILICASurfaceInd,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAComment,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAGirthWeldNumDS,IPL_INLINEINSPECTIONCRACKANOMALY_ILICADistanceFromGirthWeldDS,IPL_INLINEINSPECTIONCRACKANOMALY_ILICALatitude,IPL_INLINEINSPECTIONCRACKANOMALY_ILICALongitude,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAX,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAY,IPL_INLINEINSPECTIONCRACKANOMALY_ILICAZ,IPL_INLINEINSPECTIONCRACKANOMALY_StationSeriesId,IPL_INLINEINSPECTIONCRACKANOMALY_StationNum,Unnamed: 26,IPL_INLINEINSPECTIONCRACKANOMALY_LastModByUserId,IPL_INLINEINSPECTIONCRACKANOMALY_LastModDateTime,IPL_INLINEINSPECTIONCRACKANOMALY_RouteID,IPL_STATIONSERIES_OBJECTID,IPL_STATIONSERIES_Id,IPL_STATIONSERIES_EffectiveStartDate,IPL_STATIONSERIES_EffectiveEndDate,IPL_STATIONSERIES_LineLoopId,IPL_STATIONSERIES_SeriesValueNumber,IPL_STATIONSERIES_ValveSectionName,IPL_STATIONSERIES_BeginStationNum,IPL_STATIONSERIES_EndStationNum,IPL_STATIONSERIES_StateProvinceDomainId,IPL_STATIONSERIES_DistrictDivisionDomainId,IPL_STATIONSERIES_AreaDomainId,IPL_STATIONSERIES_LastModByUserId,IPL_STATIONSERIES_LastModDateTime,LOC_ERROR
count,13.0,13.0,13,0.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,0.0,13.0,13,13,13.0,13.0,0.0,0.0,0.0,0.0,0.0,13.0,13.0,13,13,13,0.0,13.0,13.0,13,0.0,13.0,13.0,13,13.0,13.0,13.0,13.0,13.0,13,13,13
unique,,,1,,,,,,,,,,,,,2,1,,,,,,,,,,1,1,3,,,,1,,,,1,,,,,,1,1,1
top,,,2018-10-25 0:00:00,,,,,,,,,,,,,I,Long seam anomaly,,,,,,,,,,N,IPF\mfleming,2020-05-10 11:06:33,,,,2018-10-25 0:00:00,,,,LS2195,,,,,,IPF\mfleming,2020-04-24 14:28:27,NO ERROR
freq,,,13,,,,,,,,,,,,,11,13,,,,,,,,,,13,13,5,,,,13,,,,13,,,,,,13,13,13
mean,7.0,69.923077,,,141.0,608055.0,1069.0,13660.616692,252.923077,0.219231,10180.0,5.589,1246.538462,,170.461538,,,10190.0,8.402769,,,,,,378.0,13669.004692,,,,,310.0,378.0,,,79.0,100.0,,0.0,83490.9,1.0,3.0,10207.0,,,
std,3.89444,51.79199,,,0.0,0.0,0.0,9741.389853,163.831957,0.04591,7154.701252,5.073044,1792.853295,,96.863835,,,7154.701252,5.427829,,,,,,0.0,9741.389853,,,,,0.0,0.0,,,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,,,
min,1.0,7.0,,,141.0,608055.0,1069.0,77.431,2.0,0.18,100.0,0.001,325.0,,38.0,,,110.0,1.556,,,,,,378.0,85.819,,,,,310.0,378.0,,,79.0,100.0,,0.0,83490.9,1.0,3.0,10207.0,,,
25%,4.0,25.0,,,141.0,608055.0,1069.0,5807.998,130.0,0.19,4450.0,0.114,394.0,,103.0,,,4460.0,3.204,,,,,,378.0,5816.386,,,,,310.0,378.0,,,79.0,100.0,,0.0,83490.9,1.0,3.0,10207.0,,,
50%,7.0,58.0,,,141.0,608055.0,1069.0,18375.953,351.0,0.2,13720.0,5.48,510.0,,157.0,,,13730.0,8.121,,,,,,378.0,18384.341,,,,,310.0,378.0,,,79.0,100.0,,0.0,83490.9,1.0,3.0,10207.0,,,
75%,10.0,107.0,,,141.0,608055.0,1069.0,20493.891,377.0,0.24,15230.0,10.045,680.0,,232.0,,,15240.0,14.24,,,,,,378.0,20502.279,,,,,310.0,378.0,,,79.0,100.0,,0.0,83490.9,1.0,3.0,10207.0,,,


In [18]:
df2 = get_cracks_by_ID(df.IPL_INLINEINSPECTIONCRACKANOMALY_Id)

In [27]:
df2.describe(include='all')

Unnamed: 0,RN,line,vendor,ILIRStartDate,tool,FeatureID,status,type,chainage_m,ILICASurfaceInd,depth_fraction,length_mm,width_mm,install_date,OD_inch,WT_mm,grade_MPa,toughness_J,begin_ps_c,end_ps_c,MAOP_kPa,begin_maop_c,end_maop_c
count,13.0,13,0.0,13,13,13.0,13,13,13.0,13,13.0,13.0,0.0,13,13.0,13.0,13.0,0.0,13.0,13.0,13.0,13.0,13.0
unique,,1,0.0,1,1,13.0,1,1,,2,,,0.0,3,,,,0.0,,,,,
top,,LS2195,,2016-04-24,UT,390.0,Active,Long Seam Anomaly,,I,,,,1981-06-12,,,,,,,,,
freq,,13,,13,13,1.0,13,13,,11,,,,9,,,,,,,,,
mean,20.307692,,,,,,,,13669.005129,,0.219231,1246.526538,,,10.75,5.59994,317.0006,,11251.548898,16727.420222,8269.9966,10918.445504,17171.956008
std,14.238356,,,,,,,,9741.390167,,0.04591,1792.853276,,,0.0,1.848891e-15,5.916451e-14,,7898.883225,11491.338751,0.0,8274.69195,11700.274067
min,3.0,,,,,,,,85.819,,0.18,324.993,,,10.75,5.59994,317.0006,,59.349,365.72901,8269.9966,0.0,365.72901
25%,8.0,,,,,,,,5816.38618,,0.19,393.979,,,10.75,5.59994,317.0006,,5490.37817,6430.2332,8269.9966,3526.36811,6461.0222
50%,17.0,,,,,,,,18384.34159,,0.2,509.981,,,10.75,5.59994,317.0006,,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078
75%,31.0,,,,,,,,20502.27965,,0.24,679.983,,,10.75,5.59994,317.0006,,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078


In [28]:
conditions = [pd.to_datetime(df2.install_date).dt.year < 1970,
             (df2.OD_inch < 16) & (pd.to_datetime(df2.install_date).dt.year >= 1970),
             (df2.OD_inch >= 16) & (pd.to_datetime(df2.install_date).dt.year >= 1970)]
choice = [10.0, 
          20.0, 
          27.0]

df2.toughness_J = pd.Series(np.select(conditions, choice))
df2.vendor = 'Rosen3'
df2.tool = 'UTCD'
df2

Unnamed: 0,RN,line,vendor,ILIRStartDate,tool,FeatureID,status,type,chainage_m,ILICASurfaceInd,depth_fraction,length_mm,width_mm,install_date,OD_inch,WT_mm,grade_MPa,toughness_J,begin_ps_c,end_ps_c,MAOP_kPa,begin_maop_c,end_maop_c
0,3,LS2195,Rosen3,2016-04-24,UTCD,2,Active,Long Seam Anomaly,85.819,I,0.2,324.993,,1978-07-31,10.75,5.59994,317.00057,20.0,59.349,365.72901,8269.9966,0.0,365.72901
1,7,LS2195,Rosen3,2016-04-24,UTCD,10,Active,Long Seam Anomaly,430.37001,I,0.24,344.983,,1981-06-12,10.75,5.59994,317.00057,20.0,365.72901,845.30802,8269.9966,365.72901,1703.92805
2,14,LS2195,Rosen3,2016-04-24,UTCD,118,Active,Long Seam Anomaly,5246.59817,I,0.34,509.981,,1997-07-21,10.75,5.59994,317.00057,20.0,3806.23912,5472.50817,8269.9966,3526.36811,6461.0222
3,21,LS2195,Rosen3,2016-04-24,UTCD,130,Active,Long Seam Anomaly,5816.38618,I,0.2,424.993,,1997-07-21,10.75,5.59994,317.00057,20.0,5490.37817,6430.2332,8269.9966,3526.36811,6461.0222
4,10,LS2195,Rosen3,2016-04-24,UTCD,131,Active,Long Seam Anomaly,5817.91318,I,0.18,452.984,,1997-07-21,10.75,5.59994,317.00057,20.0,5490.37817,6430.2332,8269.9966,3526.36811,6461.0222
5,34,LS2195,Rosen3,2016-04-24,UTCD,165,Active,Long Seam Anomaly,7748.91225,I,0.18,327.99,,1981-06-12,10.75,5.59994,317.00057,20.0,6630.75821,13274.75842,8269.9966,6461.0222,16439.23953
6,30,LS2195,Rosen3,2016-04-24,UTCD,351,Active,Long Seam Anomaly,18384.34159,I,0.19,572.999,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078
7,17,LS2195,Rosen3,2016-04-24,UTCD,362,Active,Long Seam Anomaly,19592.17963,I,0.24,567.995,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078
8,4,LS2195,Rosen3,2016-04-24,UTCD,367,Active,Long Seam Anomaly,19982.90364,E,0.19,6548.984,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078
9,42,LS2195,Rosen3,2016-04-24,UTCD,377,Active,Long Seam Anomaly,20502.27965,E,0.23,1871.98,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078


In [31]:
def cgr(**kwargs):
    return 0.

config = dict(run_date='2019-12-31',
             weibull_shape=2.0,
             weibull_scale=0.26,
             rupt_thresh=1.1,
             iterations=1_000_000,
             cgr=cgr,
             lcgr=cgr)

LFERW = general_POE.MonteCarlo('MD', config=config)
LFERW.df = df2#.query("chainage_m.between(6201.048,6215.525)")

qc_cols = ['line',
            'FeatureID',
            'vendor',
            'tool',
            'ILIRStartDate',
            'status',
            'type',
            'ILIFSurfaceInd',
            'chainage_m',
            'depth_fraction',
            'length_mm',
            'width_mm',
            'vendor_cgr_mmpyr',
            'vendor_cgr_sd',
            'OD_inch',
            'WT_mm',
            'grade_MPa',
            'toughness_J',
            'install_date',
            'coating_type',
            'incubation_yrs',
            'MAOP_kPa',
            'PMax_kPa',
            'PMin_kPa',
            'AESC']

missing_cols = pd.Index(np.extract(~pd.Index(qc_cols).isin(LFERW.df.columns),
                                   qc_cols))

LFERW.df = LFERW.df.reindex(columns=[*LFERW.df.columns.append(missing_cols)],
                           fill_value=np.nan)

LFERW.process_dates()
LFERW.run()
LFERW.merge_result('FeatureID').style.apply(background_gradient, subset=pd.IndexSlice[:,['POE_l','POE_r']])


#print(f"LEAK POE: {1- np.prod(1 - temp.leak_poe):.3e} - RUPTURE POE: {1- np.prod(1 - temp.rupture_poe):.3e}")

  failStress = (flowS/Mp)*np.arccos(np.exp(-x))/np.arccos(np.exp(-y))
  ruptures = fail_pressure <= thresh * operating_pressure


Model: MD POE Simulation
Count of anomalies: 13
Iterations: 1,000,000
Date of analysis: 2019-12-31
Weibull Shape: 2.0
Weibull Scale: 0.26
Leak threshold modifier: 0.8
Rupture threshold modifier: 1.1
Aggregated POE for these features is 0.8332593234692081.

Calculation took 13.8583 seconds.


Unnamed: 0,FeatureID,fail_count,iterations,rupture_count,leak_count,nan,PDP_frac,clength,POE,POE_l,POE_r,1-POE,RN,line,vendor,ILIRStartDate,tool,status,type,chainage_m,ILICASurfaceInd,depth_fraction,length_mm,width_mm,install_date,OD_inch,WT_mm,grade_MPa,toughness_J,begin_ps_c,end_ps_c,MAOP_kPa,begin_maop_c,end_maop_c,ILIFSurfaceInd,vendor_cgr_mmpyr,vendor_cgr_sd,coating_type,incubation_yrs,PMax_kPa,PMin_kPa,AESC
0,2,89056,1000000,89056,8,1,0.2,324.993,0.089056,8e-06,0.089056,0.910944,3,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,85.819,I,0.2,324.993,,1978-07-31,10.75,5.59994,317.00057,20.0,59.349,365.72901,8269.9966,0.0,365.72901,,,,,,,,
1,10,144243,1000000,144243,17,1,0.24,344.983,0.144243,1.7e-05,0.144243,0.855757,7,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,430.37001,I,0.24,344.983,,1981-06-12,10.75,5.59994,317.00057,20.0,365.72901,845.30802,8269.9966,365.72901,1703.92805,,,,,,,,
2,118,364188,1000000,364188,412,1,0.34,509.981,0.364188,0.000412,0.364188,0.635812,14,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,5246.59817,I,0.34,509.981,,1997-07-21,10.75,5.59994,317.00057,20.0,3806.23912,5472.50817,8269.9966,3526.36811,6461.0222,,,,,,,,
3,130,88883,1000000,88883,5,1,0.2,424.993,0.088883,5e-06,0.088883,0.911117,21,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,5816.38618,I,0.2,424.993,,1997-07-21,10.75,5.59994,317.00057,20.0,5490.37817,6430.2332,8269.9966,3526.36811,6461.0222,,,,,,,,
4,131,68228,1000000,68228,5,1,0.18,452.984,0.068228,5e-06,0.068228,0.931772,10,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,5817.91318,I,0.18,452.984,,1997-07-21,10.75,5.59994,317.00057,20.0,5490.37817,6430.2332,8269.9966,3526.36811,6461.0222,,,,,,,,
5,165,68202,1000000,68202,3,1,0.18,327.99,0.068202,3e-06,0.068202,0.931798,34,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,7748.91225,I,0.18,327.99,,1981-06-12,10.75,5.59994,317.00057,20.0,6630.75821,13274.75842,8269.9966,6461.0222,16439.23953,,,,,,,,
6,351,78457,1000000,78457,5,1,0.19,572.999,0.078457,5e-06,0.078457,0.921543,30,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,18384.34159,I,0.19,572.999,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078,,,,,,,,
7,362,143801,1000000,143801,22,1,0.24,567.995,0.143801,2.2e-05,0.143801,0.856199,17,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,19592.17963,I,0.24,567.995,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078,,,,,,,,
8,367,78328,1000000,78328,6,1,0.19,6548.984,0.078328,6e-06,0.078328,0.921672,4,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,19982.90364,E,0.19,6548.984,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078,,,,,,,,
9,377,128798,1000000,128798,15,1,0.23,1871.98,0.128798,1.5e-05,0.128798,0.871202,42,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,20502.27965,E,0.23,1871.98,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078,,,,,,,,


In [34]:
LFERW.merge_result('FeatureID').to_clipboard()

In [32]:
config = dict(run_date='2019-12-31',
             weibull_shape=2.0,
             weibull_scale=0.26,
             rupt_thresh=1.1,
             iterations=1_000_000,
             cgr=cgr,
             lcgr=cgr)

LFERW2 = general_POE.MonteCarlo('MD', config=config)
LFERW2.df = df2#.query("chainage_m.between(6201.048,6215.525)")

qc_cols = ['line',
            'FeatureID',
            'vendor',
            'tool',
            'ILIRStartDate',
            'status',
            'type',
            'ILIFSurfaceInd',
            'chainage_m',
            'depth_fraction',
            'length_mm',
            'width_mm',
            'vendor_cgr_mmpyr',
            'vendor_cgr_sd',
            'OD_inch',
            'WT_mm',
            'grade_MPa',
            'toughness_J',
            'install_date',
            'coating_type',
            'incubation_yrs',
            'MAOP_kPa',
            'PMax_kPa',
            'PMin_kPa',
            'AESC']

missing_cols = pd.Index(np.extract(~pd.Index(qc_cols).isin(LFERW2.df.columns),
                                   qc_cols))

LFERW2.df = LFERW2.df.reindex(columns=[*LFERW2.df.columns.append(missing_cols)],
                           fill_value=np.nan)

LFERW2.process_dates()
LFERW2.special_run()
LFERW2.merge_result('FeatureID').style.apply(background_gradient, subset=pd.IndexSlice[:,['POE_l','POE_r']])


#print(f"LEAK POE: {1- np.prod(1 - temp.leak_poe):.3e} - RUPTURE POE: {1- np.prod(1 - temp.rupture_poe):.3e}")

  failStress = (flowS/Mp)*np.arccos(np.exp(-x))/np.arccos(np.exp(-y))
  ruptures = fail_pressure <= thresh * operating_pressure


Model: MD POE Simulation
Count of anomalies: 13
Iterations: 1,000,000
Date of analysis: 2019-12-31
Weibull Shape: 2.0
Weibull Scale: 0.26
Leak threshold modifier: 0.8
Rupture threshold modifier: 1.1
Aggregated POE for these features is 0.833484360767919.

Calculation took 14.0560 seconds.


Unnamed: 0,FeatureID,PDP_frac,flength,fwidth,iterations,fail_count,rupture_count,leak_count,nan,POE,POE_l,POE_r,1-POE,RN,line,vendor,ILIRStartDate,tool,status,type,chainage_m,ILICASurfaceInd,depth_fraction,length_mm,width_mm,install_date,OD_inch,WT_mm,grade_MPa,toughness_J,begin_ps_c,end_ps_c,MAOP_kPa,begin_maop_c,end_maop_c,ILIFSurfaceInd,vendor_cgr_mmpyr,vendor_cgr_sd,coating_type,incubation_yrs,PMax_kPa,PMin_kPa,AESC
0,2,0.2,324.993,,1000000,88922,88922,7,0.0,0.088922,7e-06,0.088922,0.911078,3,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,85.819,I,0.2,324.993,,1978-07-31,10.75,5.59994,317.00057,20.0,59.349,365.72901,8269.9966,0.0,365.72901,,,,,,,,
1,10,0.24,344.983,,1000000,143948,143948,28,0.0,0.143948,2.8e-05,0.143948,0.856052,7,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,430.37001,I,0.24,344.983,,1981-06-12,10.75,5.59994,317.00057,20.0,365.72901,845.30802,8269.9966,365.72901,1703.92805,,,,,,,,
2,118,0.34,509.981,,1000000,364040,364039,428,0.0,0.36404,0.000428,0.364039,0.63596,14,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,5246.59817,I,0.34,509.981,,1997-07-21,10.75,5.59994,317.00057,20.0,3806.23912,5472.50817,8269.9966,3526.36811,6461.0222,,,,,,,,
3,130,0.2,424.993,,1000000,89395,89395,1,0.0,0.089395,1e-06,0.089395,0.910605,21,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,5816.38618,I,0.2,424.993,,1997-07-21,10.75,5.59994,317.00057,20.0,5490.37817,6430.2332,8269.9966,3526.36811,6461.0222,,,,,,,,
4,131,0.18,452.984,,1000000,68297,68297,3,0.0,0.068297,3e-06,0.068297,0.931703,10,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,5817.91318,I,0.18,452.984,,1997-07-21,10.75,5.59994,317.00057,20.0,5490.37817,6430.2332,8269.9966,3526.36811,6461.0222,,,,,,,,
5,165,0.18,327.99,,1000000,68193,68193,2,0.0,0.068193,2e-06,0.068193,0.931807,34,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,7748.91225,I,0.18,327.99,,1981-06-12,10.75,5.59994,317.00057,20.0,6630.75821,13274.75842,8269.9966,6461.0222,16439.23953,,,,,,,,
6,351,0.19,572.999,,1000000,77995,77995,5,0.0,0.077995,5e-06,0.077995,0.922005,30,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,18384.34159,I,0.19,572.999,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078,,,,,,,,
7,362,0.24,567.995,,1000000,144029,144029,23,0.0,0.144029,2.3e-05,0.144029,0.855971,17,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,19592.17963,I,0.24,567.995,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078,,,,,,,,
8,367,0.19,6548.984,,1000000,78192,78192,3,0.0,0.078192,3e-06,0.078192,0.921808,4,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,19982.90364,E,0.19,6548.984,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078,,,,,,,,
9,377,0.23,1871.98,,1000000,129383,129383,13,0.0,0.129383,1.3e-05,0.129383,0.870617,42,LS2195,Rosen3,2016-04-24,UTCD,Active,Long Seam Anomaly,20502.27965,E,0.23,1871.98,,1981-06-12,10.75,5.59994,317.00057,20.0,16542.27853,25159.5718,8269.9966,16439.23953,24464.77078,,,,,,,,


In [33]:
comparison = pd.concat([LFERW2.result[['POE','POE_l','POE_r']],LFERW.result[['POE','POE_l','POE_r']]],axis=1)
tolerance = np.isclose(comparison.iloc[:,0],comparison.iloc[:,3],atol=1e-3)
comparison
# tolerance

Unnamed: 0,POE,POE_l,POE_r,POE.1,POE_l.1,POE_r.1
0,0.088922,7e-06,0.088922,0.089056,8e-06,0.089056
1,0.143948,2.8e-05,0.143948,0.144243,1.7e-05,0.144243
2,0.36404,0.000428,0.364039,0.364188,0.000412,0.364188
3,0.089395,1e-06,0.089395,0.088883,5e-06,0.088883
4,0.068297,3e-06,0.068297,0.068228,5e-06,0.068228
5,0.068193,2e-06,0.068193,0.068202,3e-06,0.068202
6,0.077995,5e-06,0.077995,0.078457,5e-06,0.078457
7,0.144029,2.3e-05,0.144029,0.143801,2.2e-05,0.143801
8,0.078192,3e-06,0.078192,0.078328,6e-06,0.078328
9,0.129383,1.3e-05,0.129383,0.128798,1.5e-05,0.128798


## QC of Drain Factor

In [None]:
def drain_factor(emin, emed, emax, eloc):
    temp_df = np.where(eloc.between(emin, emed), 1 - 0.5*((eloc-emin)/(emed-emin)),
                      0.5*((emax-eloc)/(emax-emed)))
    return temp_df

In [None]:
drain_factor(pd.Series(801.7), pd.Series(805.69), pd.Series(809.28), pd.Series(803.43))

## QC of Release Rate Calculations

In [None]:
def damage_area_sqft(product, release_rate_kgps):
    release_rate_lbps = release_rate_kgps*2.20462
    i_c = np.where(product == 'Natural Gas', 41.0,
                                  np.where(product == 'Ethane Plus', 28.,
                                          np.where(product == 'Condensate', 4.35,
                                                  np.where(product.isin(['Diluent','CLPL Diluent']), 3.3,
                                                           0.03))))
    
    i_p = np.where(product == 'Natural Gas', 0.67,
                              np.where(product == 'Ethane Plus', 0.72,
                                      np.where(product == 'Condensate', 0.78,
                                              np.where(product.isin(['Diluent','CLPL Diluent']), 0.76,
                                                       0.99))))
        
    c_c = np.where(product == 'Natural Gas', 43.0,
                          np.where(product == 'Ethane Plus', 49.48,
                                  np.where(product == 'Condensate', 182.0,
                                          np.where(product.isin(['Diluent','CLPL Diluent']), 130.,
                                                   11.0))))

    c_p = np.where(product == 'Natural Gas', 0.98,
                      np.where(product == 'Ethane Plus', 1.0,
                              np.where(product == 'Condensate', 0.89,
                                      np.where(product.isin(['Diluent','CLPL Diluent']), 0.90,
                                               0.91))))
    
    
    temp_area = np.where((release_rate_kgps)>25.20159, i_c*np.power(180.*release_rate_lbps,i_p),
                        c_c*np.power(release_rate_lbps,c_p))
    
    return temp_area

def release_rate_kgps(state, cp, density, mw, maop, t, hole_a):
    k = cp / (cp - 8.314)
    tp = 101.325*np.power( (k+1)/2 ,k/(k-1))
    t_K = t+273.15
    
    regime = np.where(maop > tp, 'supersonic', 'subsonic')
    
    temp_rel = np.where(state == 'Liquid', 0.62*hole_a*np.sqrt(2*density*maop*1000.),
                       np.where(regime=='supersonic',0.9*hole_a*maop*1000.*np.sqrt( ((k*mw)/(8314.4598*t_K))*np.power(2/(k+1), (k+1)/(k-1))),
                               0.9*hole_a*maop*1000.*np.sqrt( (mw/(8314.4598*t_K))*(2*k/(k-1))*np.power(101.325/maop, 2/k) * (1 - np.power(101.325/maop,(k-1)/k)))))
    
    return temp_rel

In [None]:
# damage_area_sqft(pd.Series(['Ethane Plus']), pd.Series([9.44]))

# results.loc[:,['Product_Type','Release_Rate_kgpersec','Damage_Area_sqft']].assign(qc = lambda x: damage_area_sqft(x.Product_Type, x.Release_Rate_kgpersec),
#                                                                                  pct_error = lambda x: abs(x.qc-x.Damage_Area_sqft)*100/x.Damage_Area_sqft).pct_error.describe()

results.loc[:,['Product Type',
               'Mixture State',
              'Mixture Heat Capacity',
              'Mixture Density kgperm3',
              'Mixture Molecular Weight kgperkmol',
              'Licensed Pressure (kPa)',
              'Operating Temperature (°C)',
              'Hole Area sqm (m^2)',
              'Release Rate kgpersec',
              'Damage Area sqft (ft^2)']].fillna({'Operating Temperature (°C)':0.0}).assign(qc_rel = lambda x: release_rate_kgps(x['Mixture State'],
                                                                                                                                 x['Mixture Heat Capacity'],
                                                                                                                                 x['Mixture Density kgperm3'],
                                                                                                                                 x['Mixture Molecular Weight kgperkmol'],
                                                                                                                                 x['Licensed Pressure (kPa)'],
                                                                                                                                 x['Operating Temperature (°C)'],
                                                                                                                                 x['Hole Area sqm (m^2)']),
                                                                                                                    pct_error_rel = lambda x: abs(x.qc_rel-x['Release Rate kgpersec'])*100/x['Release Rate kgpersec'],
                                                                                                                    qc_dam = lambda x: damage_area_sqft(x['Product Type'], x.qc_rel),
                                                                                                                    pct_error_dam = lambda x: abs(x.qc_dam-x['Damage Area sqft (ft^2)'])*100/x['Damage Area sqft (ft^2)'])[['pct_error_rel','pct_error_dam']].describe()

In [None]:
results.loc[lambda x: x.Land_Use == 'WATER COURSE',['Bank_Full_Width_of_Watercourse_m','Outside_Diameter_Millimeter','Repair_Costs_CDN']].assign(repair = lambda x: water_repair(x.Bank_Full_Width_of_Watercourse_m, x.Outside_Diameter_Millimeter),
                                                                                                                                                qc = lambda x: abs(x.repair-x.Repair_Costs_CDN)*100./x.Repair_Costs_CDN).query("qc > 1.0")