In [1]:
#Permanent cell 1
import arcpy
import pandas as pd
import sqlite3
import math
import numpy as np
import os

In [2]:
#Permanent cell 2
def sql_to_df(sql,model):
    con = sqlite3.connect(model)
    df = pd.read_sql(sql, con)
    con.close()
    return df

def execute_sql(sqls,model):
    con = sqlite3.connect(model)
    cur = con.cursor()
    if type(sqls) == list:
        for sql in sqls:
            cur.execute(sql)
    else:         
        cur.execute(sqls)
    cur.close()
    con.commit()
    con.close()

In [3]:
#Permanent cell 3
# User Input, to move to separate sheet so no permanent cell
#stop trace if more pipes than max_steps traced from catchment, must be an endless loop. 
max_steps = 1000 

update_field_in_model = True
update_field = 'Description'

output_folder = r"J:\SEWER_AREA_MODELS\FSA\04_ANALYSIS_WORK\Model_Result_To_GIS\Automation\Rawn_Tool\Output"
model_path = r"J:\SEWER_AREA_MODELS\FSA\04_ANALYSIS_WORK\Model_Result_To_GIS\Automation\NSSA_Base_2018pop.sqlite"
sewer_area = 'NSSA'
pop_book = r"\\prdsynfile01\LWS_Modelling\SEWER_AREA_MODELS\NSSA\02_MODEL_COMPONENTS\04_DATA\01. POPULATION\MPF4_Temp_Hold\NSSA_Master_Population_File_4_No_2237_ResArea.xlsx"
pop_sheet = 'MPF Update 4'
model = 'NSSA'
gdb_name = 'RAWN.gdb'
gdb_name_dissolve = 'RAWN_Dissolve.gdb' #To keep clutter out of main database

#Options to skip time consuming steps during debug, must be True during production runs
run_dissolve = False
run_dissolve_append = False
run_jpg = True
run_import = False
run_html = False


In [4]:
#Permanent cell 4
#Set up column names

years = [2060,2070,2080,2090,2100]
categories = ['res','com','ind','inst','infl','infi']

mpf_col_dict = {}

area_col_dict = {}
area_col_dict['res'] = 'Area_Res'
area_col_dict['com'] = 'Area_Com'
area_col_dict['ind'] = 'Area_Ind'
area_col_dict['inst'] = 'Area_Inst'
area_col_dict['ini'] = 'Area_Total'

per_unit_dict = {}
per_unit_dict['res'] = 320
per_unit_dict['com'] = 33700 
per_unit_dict['ind'] = 56200
per_unit_dict['inst'] = 33700
per_unit_dict['infl'] = 5600
per_unit_dict['infi'] = 5600

header_dict = {}
# header_dict['gen'] = ['GENERAL INFO',['TYPE','MODELID','CATCHMENT','ID','YEAR','LOCATION']]
header_dict['gen'] = ['GENERAL INFO',['TYPE','CATCHMENT','YEAR','LOCATION']]
header_dict['res'] = ['RESIDENTIAL',['AREA (Ha)','POPULATION','AVG. FLOW (L/s)','PEAK FLOW (L/s)']]
header_dict['com'] = ['COMMERCIAL',['AREA (Ha)','AVG. FLOW (L/s)','PEAK FLOW (L/s)']]
header_dict['ind'] = ['INDUSTRIAL',['AREA (Ha)','AVG. FLOW (L/s)','PEAK FLOW (L/s)']]
header_dict['inst'] = ['INSTITUTIONAL',['AREA (Ha)','AVG. FLOW (L/s)','PEAK FLOW (L/s)']]
header_dict['ini'] = ['INFLOW / INFILTRATION',['AREA (Ha)','INFLOW (L/s)','INFILTRATION (L/s)']]
header_dict['flow'] = ['FLOWS',['AVG. SAN. FLOW (L/s)','ADWF (L/s)','PWWF (L/s)']]

avg_calc_dict = {}
avg_calc_dict['res'] = ['RESIDENTIAL','POPULATION','AVG. FLOW (L/s)']
avg_calc_dict['com'] = ['COMMERCIAL','AREA (Ha)','AVG. FLOW (L/s)']
avg_calc_dict['ind'] = ['INDUSTRIAL','AREA (Ha)','AVG. FLOW (L/s)']
avg_calc_dict['inst'] = ['INSTITUTIONAL','AREA (Ha)','AVG. FLOW (L/s)']
avg_calc_dict['infl'] = ['INFLOW / INFILTRATION','AREA (Ha)','INFLOW (L/s)']
avg_calc_dict['infi'] = ['INFLOW / INFILTRATION','AREA (Ha)','INFILTRATION (L/s)']

header_tuples = []
for header in header_dict:
    for sub_header in (header_dict[header][1]):
        header_tuples.append((header_dict[header][0],sub_header))
header_tuples

# columns_multiindex = pd.MultiIndex.from_tuples(header_tuples,names=['Category', 'Subcategory'])
columns_multiindex = pd.MultiIndex.from_tuples(header_tuples)
df_template = pd.DataFrame(columns=columns_multiindex)





In [5]:
#Permanent cell 5
#Import population
pop_df = pd.read_excel(pop_book,sheet_name=pop_sheet,dtype={'Catchment': str})#[['Catchment','Year','Pop_Total']]
pop_df.rename(columns={"Pop_Total": "Population"},inplace=True)
pop_df = pop_df[['Catchment','Year','Pop_ResLD','Pop_ResHD','Pop_Mixed','Population','Area_ResLD','Area_ResHD','Area_Mixed','Area_Com','Area_Ind','Area_Inst']]
pop_df['Area_Res'] = pop_df.Area_ResLD + pop_df.Area_ResHD + pop_df.Area_Mixed
pop_df['Area_Total'] = pop_df.Area_ResLD + pop_df.Area_ResHD + pop_df.Area_Mixed + pop_df.Area_Com + pop_df.Area_Ind + pop_df.Area_Inst
pop_df['Population_Sum_Check'] = pop_df.Pop_ResLD + pop_df.Pop_ResHD + pop_df.Pop_Mixed
pop_sum_total_col = int(pop_df.Population.sum())
pop_sum_sub_cols = int(pop_df.Pop_ResLD.sum() + pop_df.Pop_ResHD.sum() + pop_df.Pop_Mixed.sum())
pop_df['Key'] = sewer_area + '@' + pop_df.Catchment + '@' + pop_df['Year'].astype(str)
pop_df.set_index('Key',inplace=True)

if pop_sum_total_col != pop_sum_sub_cols:
      raise ValueError("Error. The sum of 'Population' (" + str(pop_sum_total_col) + ") is different than the sum of 'Pop_ResLD' + 'Pop_ResHD' + 'Pop_Mixed' (" + str(pop_sum_sub_cols) + ")") 


In [6]:
#Permanent cell 6
#Import model data

node_types = {}
node_types[1] = 'Manhole'
node_types[2] = 'Basin'
node_types[3] = 'Outlet'
node_types[4] = 'Junction'
node_types[5] = 'Soakaway'
node_types[6] = 'River Junction'

sql = "SELECT catchid AS Catchment, nodeid AS Connected_Node FROM msm_Catchcon WHERE Active = 1"
catchments = sql_to_df(sql,model_path)

sql = "SELECT muid AS MUID, fromnodeid AS [From], tonodeid as [To], uplevel AS Outlet_Level FROM msm_Link WHERE Active = 1"
lines = sql_to_df(sql,model_path)

sql = "SELECT muid AS MUID, fromnodeid AS [From], tonodeid as [To], invertlevel AS Outlet_Level FROM msm_Orifice WHERE Active = 1"
orifices = sql_to_df(sql,model_path)
lines = pd.concat([lines,orifices])

sql = "SELECT muid AS MUID, fromnodeid AS [From], tonodeid as [To], invertlevel AS Outlet_Level FROM msm_Valve WHERE Active = 1"
valves = sql_to_df(sql,model_path)
lines = pd.concat([lines,valves])

sql = "SELECT muid AS MUID, fromnodeid AS [From], tonodeid as [To], crestlevel AS Outlet_Level FROM msm_Weir WHERE Active = 1"
weirs = sql_to_df(sql,model_path)
lines = pd.concat([lines,weirs])

sql = "SELECT muid AS MUID, fromnodeid AS [From], tonodeid as [To], startlevel AS Outlet_Level FROM msm_Pump WHERE Active = 1"
pumps = sql_to_df(sql,model_path)
lines = pd.concat([lines,pumps])

lines['Outlet_Level'].fillna(-9999, inplace=True)

sql = "SELECT muid, acronym, assetname FROM msm_Node WHERE active = 1"
node_id_df = sql_to_df(sql,model_path)
node_id_df = node_id_df[(node_id_df.assetname.str[:2]=='MH') & (node_id_df.assetname.str.len() > 2) & (node_id_df.acronym.notna())]
node_id_df.rename(columns={'muid':'Node'},inplace=True)
node_id_df['ID'] = node_id_df.acronym + '_' + node_id_df.assetname
node_id_df = node_id_df[['Node','ID']]

In [7]:
#Permanent cell 7
#Trace the model

accumulated_catchment_set = set()
accumulated_node_set = set()

for index1, row1 in catchments.iterrows():
    catchment = row1['Catchment']
    nodes = [row1['Connected_Node']]
    start_node = row1['Connected_Node']
    steps = 0
    
    accumulated_catchment_set.add((start_node,catchment))
        
    while steps <= max_steps:
        steps += 1
        downstream_df = lines[lines['From'].isin(nodes)]

        if len(downstream_df) > 0:
            nodes = list(downstream_df.To.unique())

            nodes = [node for node in nodes if len(node)>0]
            for node in nodes:
                accumulated_catchment_set.add((node,catchment))       
        else:
            break
        if steps == max_steps:
            raise ValueError("Maximum steps were reached, indicating a loop. Start catchment is '" + catchment + "'")
           
        accumulated_catchment_set.add((node,catchment))
        
accumulation_df = pd.DataFrame(accumulated_catchment_set,columns=['Node','Catchment'])
accumulation_df = pd.merge(accumulation_df,node_id_df,how='inner',on=['Node'])
data = {
    ('GENERAL INFO', 'CATCHMENT'): accumulation_df.Catchment,
    ('GENERAL INFO', 'NODE'): accumulation_df.Node,
    ('GENERAL INFO', 'ID'): accumulation_df.ID,
}

# Create a DataFrame with MultiIndex columns
accumulation_df = pd.DataFrame(data)



In [8]:
#Permanent cell 8
#Calculate RAWN

catchments = list(pop_df.Catchment.unique())

catchment_df = df_template.copy()
for catchment in catchments:
    for year in years:
        key = model + '@' + catchment + '@' + str(year)
        catchment_df.loc[key,('GENERAL INFO','TYPE')] = 'UNKNOWN'
        catchment_df.loc[key,('GENERAL INFO','CATCHMENT')] = catchment
        catchment_df.loc[key,('GENERAL INFO','YEAR')] = year
        catchment_df.loc[key,('GENERAL INFO','LOCATION')] = model
        for area_col_dict_key in area_col_dict:
            catchment_df.loc[key,(header_dict[area_col_dict_key][0],'AREA (Ha)')] = pop_df.loc[key,area_col_dict[area_col_dict_key]]
        catchment_df.loc[key,('RESIDENTIAL','POPULATION')] = pop_df.loc[key,'Population']
        san_flow = 0
        adwf = 0
        for avg_calc_dict_key in avg_calc_dict:
            input1 = catchment_df.loc[key,(avg_calc_dict[avg_calc_dict_key][0],avg_calc_dict[avg_calc_dict_key][1])]
            input2 = per_unit_dict[avg_calc_dict_key]
            avg_flow = input1 * input2 / 86400
            adwf += avg_flow
            if avg_calc_dict_key not in ['infl','infi']:
                san_flow += avg_flow
            catchment_df.loc[key,(avg_calc_dict[avg_calc_dict_key][0],avg_calc_dict[avg_calc_dict_key][2])] = avg_flow
        catchment_df.loc[key,('FLOWS','AVG. SAN. FLOW (L/s)')] = san_flow
        catchment_df.loc[key,('FLOWS','ADWF (L/s)')] = adwf

        
catchment_node_df = accumulation_df.merge(catchment_df,on=[('GENERAL INFO','CATCHMENT')],how='inner')
node_df = catchment_node_df.copy()
node_df.drop(columns=[('GENERAL INFO','CATCHMENT')],inplace=True)
node_df = node_df.groupby([('GENERAL INFO','NODE'),('GENERAL INFO','TYPE'),('GENERAL INFO','YEAR'),('GENERAL INFO','LOCATION'),('GENERAL INFO','ID')]).sum()
node_df.reset_index(inplace=True)
node_df[('RESIDENTIAL','PEAK FLOW (L/s)')] = (1 + 14 / (4 + (node_df[('RESIDENTIAL','POPULATION')] / 1000) ** 0.5)) * node_df[('RESIDENTIAL','AVG. FLOW (L/s)')]
node_df[('COMMERCIAL','PEAK FLOW (L/s)')] = (1 + 14 / (4 + (per_unit_dict['com'] * node_df[('COMMERCIAL','AREA (Ha)')]/(per_unit_dict['res'] * 1000)) ** 0.5))*node_df[('COMMERCIAL','AVG. FLOW (L/s)')]*0.8
node_df[('INSTITUTIONAL','PEAK FLOW (L/s)')] = (1 + 14 / (4 + (per_unit_dict['inst'] * node_df[('INSTITUTIONAL','AREA (Ha)')] / (per_unit_dict['res'] * 1000)) ** 0.5)) * node_df[('INSTITUTIONAL','AVG. FLOW (L/s)')]

mask = node_df[('INDUSTRIAL', 'AREA (Ha)')] != 0 #Avoid error from log(0)
node_df.loc[mask, ('INDUSTRIAL', 'PEAK FLOW (L/s)')] = (
    0.8 * (1 + 14 / (4 + (node_df[('INDUSTRIAL', 'AREA (Ha)')][mask] * per_unit_dict['ind'] / (per_unit_dict['res'] * 1000)) ** 0.5)) *
    np.where(
        node_df[('INDUSTRIAL', 'AREA (Ha)')][mask] < 121,
        1.7,
        2.505 - 0.1673 * np.log(node_df[('INDUSTRIAL', 'AREA (Ha)')][mask])
    ) * node_df[('INDUSTRIAL', 'AVG. FLOW (L/s)')][mask]
)

node_df[('FLOWS','PWWF (L/s)')] = (
    node_df[('RESIDENTIAL','PEAK FLOW (L/s)')] +
    node_df[('COMMERCIAL','PEAK FLOW (L/s)')] +
    node_df[('INDUSTRIAL','PEAK FLOW (L/s)')] +
    node_df[('INSTITUTIONAL','PEAK FLOW (L/s)')] +
    node_df[('INFLOW / INFILTRATION','INFLOW (L/s)')] +
    node_df[('INFLOW / INFILTRATION','INFILTRATION (L/s)')]
)

excel_folder = output_folder + '\\Excel'
if not os.path.isdir(excel_folder): os.makedirs(excel_folder) 
for id in node_df[('GENERAL INFO','ID')].unique():    
    node_single_df = node_df[node_df[('GENERAL INFO','ID')]==id]
    id = id.replace('/','-') if '/' in id else id
    node_single_df.to_excel(excel_folder + '\\' + id + '.xlsx')


In [9]:
# #Test cell
# aprx = arcpy.mp.ArcGISProject("CURRENT")
# m = aprx.listMaps('Map')[0]
# for lyr in m.listLayers():
#   if lyr.isFeatureLayer:
#     print (lyr.name)
#     sym = lyr.symbology
#     if lyr.name == 'msm_CatchCon':
#         sym.renderer.symbol.color = {'RGB': [255, 0, 0]}  # Red color
#         lyr.symbology = sym
# aprx.save()
        

In [10]:
# aprx = arcpy.mp.ArcGISProject("CURRENT")
# out_path = r'J:\SEWER_AREA_MODELS\FSA\04_ANALYSIS_WORK\Model_Result_To_GIS\Automation\Rawn_Tool\Output' + '\\' + gdb_name
# arcpy.env.workspace = out_path
# sr = arcpy.SpatialReference(26910)
# layers = ['msm_CatchCon']
# arcpy.env.overwriteOutput = True
# for layer in layers:
#     arcpy.conversion.FeatureClassToFeatureClass(model_path + '\\' + layer, out_path, layer)
#     arcpy.DefineProjection_management(layer, sr)


In [11]:
# out_path = r'J:\SEWER_AREA_MODELS\FSA\04_ANALYSIS_WORK\Model_Result_To_GIS\Automation\Rawn_Tool\Output' + '\\' + gdb_name
# arcpy.env.workspace = out_path

In [12]:
#Permanent cell 9
#Import GIS from the model
if run_import:

    out_path = output_folder + '\\' + gdb_name

    if not os.path.isdir(out_path):
        arcpy.management.CreateFileGDB(output_folder, gdb_name)

    arcpy.env.workspace = out_path
    sr = arcpy.SpatialReference(26910)

    layers = ['msm_CatchCon','msm_Catchment','msm_Link','msm_Node','msm_Pump','msm_Weir','msm_Orifice','msm_Valve']

    for layer in layers:
        print(layer)
    #     arcpy.conversion.FeatureClassToFeatureClass(model_path + '\\' + layer, out_path, layer)

        arcpy.management.MakeFeatureLayer(model_path + '\\' + layer, "temp_layer", "Active = 1")

        if arcpy.Exists(layer):
            arcpy.management.DeleteFeatures(layer)
            arcpy.management.Append("temp_layer", layer, "NO_TEST")
        else:    
            arcpy.conversion.FeatureClassToFeatureClass("temp_layer", out_path, layer)
            if layer == 'msm_Catchment':
                arcpy.management.AddField('msm_catchment', "Drains_To", "TEXT")

        arcpy.management.Delete("temp_layer")
        arcpy.DefineProjection_management(layer, sr)






In [13]:
#Algorithm to speed things up
#Add a level to the trace, 1 being most upstream
#Op

In [14]:
#Permanent cell 10
#Dissolve catchments
arcpy.env.addOutputsToMap = False
if run_dissolve:
    arcpy.management.CreateFileGDB(output_folder, gdb_name_dissolve)
    dissolve_path = output_folder + '\\' + gdb_name_dissolve
    arcpy.conversion.FeatureClassToFeatureClass('msm_Catchment', dissolve_path, 'msm_Catchment')
    nodes = list(accumulation_df[('GENERAL INFO','NODE')].unique())
    for i, node in enumerate(nodes):
        print('Dissolving for node ' + str(i) + ' of ' + str(len(nodes)) + ' at time ' + str(datetime.datetime.now()))
        catchment_df = accumulation_df[accumulation_df[('GENERAL INFO','NODE')]==node]
        catchments = list(catchment_df[('GENERAL INFO','CATCHMENT')].unique())
        arcpy.management.CalculateField(dissolve_path + '\\msm_Catchment', "Drains_To", "''", "PYTHON3")
        with arcpy.da.UpdateCursor(dissolve_path + '\\msm_catchment', ['muid', 'Drains_To']) as cursor:
            for row in cursor:
                if row[0] in catchments:
                    row[1] = node
                    cursor.updateRow(row)

        query = "Drains_To = 'Test'"
        arcpy.management.MakeFeatureLayer(dissolve_path + '\\msm_catchment', "temp_layer", "Drains_To = '" + node + "'")
        dissolve_output = dissolve_path + '\\msm_Catchment_Dissolve_Single'
        arcpy.management.Dissolve("temp_layer", dissolve_output, "Drains_To", "", "MULTI_PART")
        arcpy.management.Delete("temp_layer")

        arcpy.conversion.FeatureClassToFeatureClass(dissolve_path + '\\msm_Catchment_Dissolve_Single', dissolve_path, 'Node_Catchment_' + node)



In [15]:
# import sys

# def get_memory_usage(var):
#     """Returns the memory usage of a variable in bytes."""
#     return sys.getsizeof(var)

# def list_variables_memory():
#     # Get the local and global variables
#     variables = {**globals(), **locals()}
#     total_memory = 0
    
#     # Print the variables and their memory consumption
#     for var_name, var_value in variables.items():
#         # Filter out the built-in variables and functions
#         if not var_name.startswith('__') and not callable(var_value):
#             memory = get_memory_usage(var_value)
#             total_memory += memory
#             print(f"Variable: {var_name}, Type: {type(var_value)}, Memory: {memory} bytes")

#     total_memory_mb = total_memory / (1024 * 1024)  # Convert bytes to megabytes
#     print(f"Total Memory Usage: {total_memory_mb:.2f} MB")
#     return total_memory_mb


# # Call the function to list variables and their memory consumption
# total_memory_usage_mb = list_variables_memory()


In [16]:
#Permanent cell 11
#Append individual dissolved catchments to one layer.

if run_dissolve_append:
    nodes = list(accumulation_df[('GENERAL INFO','NODE')].unique())
    for i, node in enumerate(nodes):    
        print('Appending for node ' + str(i) + ' of ' + str(len(nodes)) + ' at time ' + str(datetime.datetime.now()))
        if i == 0:
            arcpy.conversion.FeatureClassToFeatureClass(dissolve_path + '\\Node_Catchment_' + node, out_path, 'Node_Catchment')
        else:
            arcpy.management.Append(dissolve_path + '\\Node_Catchment_' + node, "Node_Catchment", "NO_TEST")


In [21]:
#Permanent cell 12
#Export jpgs
if run_jpg:
    aprx = arcpy.mp.ArcGISProject("CURRENT")
    project_path = aprx.filePath

    jpg_folder = output_folder + r'\HTML\Maps_And_CSS'
    if not os.path.isdir(jpg_folder): os.makedirs(jpg_folder) 


    # project_directory = os.path.dirname(project_path)

    layouts = aprx.listLayouts()
    export_fails = []

    for layout in layouts:

        if layout.mapSeries is not None:
            map_series = layout.mapSeries
            # Loop through all pages in the map series
            for page_number in range(1, map_series.pageCount + 1):
                map_series.currentPageNumber = page_number
                output_filename = os.path.join(jpg_folder, f"{map_series.pageRow.Drains_To}.jpg")
                try:
                    layout.exportToJPEG(output_filename, resolution=300)
                except:
                    print(f'WARNING! {map_series.pageRow.Drains_To} could not be made')
                    export_fails.append(map_series.pageRow.Drains_To)
                print (f'Printing jpg {page_number} of {map_series.pageCount} at time {datetime.datetime.now()}')

    print(f'the following pages failed: {export_fails}')            
    print("Export complete.")




Printing jpg 1 of 469 at time 2024-05-27 09:26:01.093705
Printing jpg 2 of 469 at time 2024-05-27 09:26:02.720209
Printing jpg 3 of 469 at time 2024-05-27 09:26:04.314186
Printing jpg 4 of 469 at time 2024-05-27 09:26:06.033776
Printing jpg 5 of 469 at time 2024-05-27 09:26:07.877481
Printing jpg 6 of 469 at time 2024-05-27 09:26:09.813775
Printing jpg 7 of 469 at time 2024-05-27 09:26:11.690510
Printing jpg 8 of 469 at time 2024-05-27 09:26:13.316014
Printing jpg 9 of 469 at time 2024-05-27 09:26:15.095665
Printing jpg 10 of 469 at time 2024-05-27 09:26:17.001428
Printing jpg 11 of 469 at time 2024-05-27 09:26:18.690990
Printing jpg 12 of 469 at time 2024-05-27 09:26:20.533694
Printing jpg 13 of 469 at time 2024-05-27 09:26:22.346874
Printing jpg 14 of 469 at time 2024-05-27 09:26:24.095491
Printing jpg 15 of 469 at time 2024-05-27 09:26:25.878140
Printing jpg 16 of 469 at time 2024-05-27 09:26:27.846464
Printing jpg 17 of 469 at time 2024-05-27 09:26:29.598084
Printing jpg 18 of 469 

Printing jpg 141 of 469 at time 2024-05-27 09:29:59.007444
Printing jpg 142 of 469 at time 2024-05-27 09:30:00.975263
Printing jpg 143 of 469 at time 2024-05-27 09:30:02.755414
Printing jpg 144 of 469 at time 2024-05-27 09:30:04.599621
Printing jpg 145 of 469 at time 2024-05-27 09:30:06.539414
Printing jpg 146 of 469 at time 2024-05-27 09:30:08.317561
Printing jpg 147 of 469 at time 2024-05-27 09:30:10.224323
Printing jpg 148 of 469 at time 2024-05-27 09:30:12.130085
Printing jpg 149 of 469 at time 2024-05-27 09:30:13.913236
Printing jpg 150 of 469 at time 2024-05-27 09:30:15.692881
Printing jpg 151 of 469 at time 2024-05-27 09:30:17.539588
Printing jpg 152 of 469 at time 2024-05-27 09:30:19.412325
Printing jpg 153 of 469 at time 2024-05-27 09:30:21.227003
Printing jpg 154 of 469 at time 2024-05-27 09:30:23.194822
Printing jpg 155 of 469 at time 2024-05-27 09:30:25.224698
Printing jpg 156 of 469 at time 2024-05-27 09:30:27.166996
Printing jpg 157 of 469 at time 2024-05-27 09:30:29.3515

Printing jpg 279 of 469 at time 2024-05-27 09:34:08.479235
Printing jpg 280 of 469 at time 2024-05-27 09:34:10.447053
Printing jpg 281 of 469 at time 2024-05-27 09:34:12.259230
Printing jpg 282 of 469 at time 2024-05-27 09:34:14.292109
Printing jpg 283 of 469 at time 2024-05-27 09:34:15.760466
Printing jpg 284 of 469 at time 2024-05-27 09:34:17.259354
Printing jpg 285 of 469 at time 2024-05-27 09:34:18.853330
Printing jpg 286 of 469 at time 2024-05-27 09:34:20.509861
Printing jpg 287 of 469 at time 2024-05-27 09:34:22.167393
Printing jpg 288 of 469 at time 2024-05-27 09:34:23.916008
Printing jpg 289 of 469 at time 2024-05-27 09:34:25.603073
Printing jpg 290 of 469 at time 2024-05-27 09:34:27.259604
Printing jpg 291 of 469 at time 2024-05-27 09:34:29.011223
Printing jpg 292 of 469 at time 2024-05-27 09:34:30.696780
Printing jpg 293 of 469 at time 2024-05-27 09:34:32.292758
Printing jpg 294 of 469 at time 2024-05-27 09:34:33.884229
Printing jpg 295 of 469 at time 2024-05-27 09:34:35.4797

Printing jpg 417 of 469 at time 2024-05-27 09:38:01.294001
Printing jpg 418 of 469 at time 2024-05-27 09:38:02.951034
Printing jpg 419 of 469 at time 2024-05-27 09:38:04.575535
Printing jpg 420 of 469 at time 2024-05-27 09:38:06.199034
Printing jpg 421 of 469 at time 2024-05-27 09:38:07.795509
Printing jpg 422 of 469 at time 2024-05-27 09:38:09.450553
Printing jpg 423 of 469 at time 2024-05-27 09:38:11.200169
Printing jpg 424 of 469 at time 2024-05-27 09:38:12.920758
Printing jpg 425 of 469 at time 2024-05-27 09:38:14.670878
Printing jpg 426 of 469 at time 2024-05-27 09:38:16.231319
Printing jpg 427 of 469 at time 2024-05-27 09:38:17.795764
Printing jpg 428 of 469 at time 2024-05-27 09:38:19.981295
Printing jpg 429 of 469 at time 2024-05-27 09:38:21.888057
Printing jpg 430 of 469 at time 2024-05-27 09:38:23.545091
Printing jpg 431 of 469 at time 2024-05-27 09:38:25.106533
Printing jpg 432 of 469 at time 2024-05-27 09:38:26.637948
Printing jpg 433 of 469 at time 2024-05-27 09:38:28.1703

In [18]:
#Permanent cell 13
#Create HTMLs

if run_html:
    shutil.copy2('style.css', html_folder + '\\style.css')
    shutil.copy2('script.js', html_folder + '\\script.js')

    for category in categories:
        area_type = category[0]
        area_names = category[1]
        header_start = category[2]

        f = open(html_folder + '\\Population_By_' + area_type + '_' + model_area + '.html', "w")
        f.write('<link rel="stylesheet" href="style.css">\n')
        f.write('<script src="script.js"></script>\n')
        f.write('<link rel="stylesheet" href="style.css">\n')
        f.write('<!DOCTYPE html>\n')
        f.write('<html>\n')
        f.write('<head>\n')
        f.write('<meta charset="utf-8">\n')
        f.write('</head>\n')
        f.write('<body>\n\n')

        f.write('<div class="tab">\n')
        for area_name in area_names:
            tab = area_name

        #     color = ps_dict[first_year]
        #     bg_color = color_dict[color][0]
        #     text_color = color_dict[color][1]

            f.write('  <button class="tablinks" onclick="openTab(event, ' + "'" + tab + "'"  + ')">' + tab + '</button>\n')
        f.write('</div>\n')

        pop_df = pop_dfss[0][2]

        for area_name in area_names:

            area_df = pop_df[pop_df[area_type]==area_name]
            area_df = area_df[['Year','Population']].groupby(['Year']).sum()

            f.write('<div id="' + area_name + '" class="tabcontent">\n') 
            f.write('<h1>' + area_name + '</h1>\n')

            f.write('<div class="sidenav">\n')

            f.write('<table style=\'width: 90%;\'>\n')
            f.write('<tr>\n')
            f.write('<th>Year</th>\n')
            f.write('<th>Population</th>\n')
            f.write('</tr>\n')

            for index, row in area_df.iterrows():
                f.write('<tr>\n')
                f.write('<td>'+ str(index) + '</td>\n')
                population_with_separator = f"{int(row['Population']):,}"
                f.write('<td>'+ population_with_separator + '</td>\n')

                f.write('</tr>\n')
            f.write('</table>\n')

            for i in range(4):
                f.write('<h1 style="color: white">End of tables</h1>\n')#Invisible, just to enable scroll to table bottoms

            f.write('</div>\n') #end sidenav


            f.write('<div class="main">\n')

            fig = go.Figure()


            fig.add_trace(go.Scatter(x=area_df.index, 
                                         y = area_df.Population, 
                                         mode='lines',name=pop_dfss[0][0],line=dict(width=5)))

            for pop_dfs in pop_dfss[1:]:
                pop_df_past = pop_dfs[2]
                area_df = pop_df_past[pop_df_past[area_type]==area_name]
                area_df = area_df[['Year','Population']].groupby(['Year']).sum()
                fig.add_trace(go.Scatter(x=area_df.index, 
                                         y = area_df.Population, 
                                         mode='lines',name=pop_dfs[0],line=dict(width=2)))

            fig.update_layout(
                title=header_start + area_name,
                autosize=False,
                width = 1500,
                height=850,
                margin=dict(
                    l=50,
                    r=50,
                    b=50,
                    t=50,
                    pad=4
                    ),
                    yaxis_title = 'Population'
                )

            f.write(fig.to_html(full_html=False, include_plotlyjs='cdn'))

            f.write('</div>\n') #end div main  

            f.write('</div>\n')  #end div tab   

            f.write('</body>\n')
        f.write('</html>\n')
        f.close()



In [19]:
# import pandas as pd

# # Sample DataFrame
# data = {
#     'Catchment': [1, 2, 3],
#     'Node': [4, 5, 6]

# }
# accumulation_df = pd.DataFrame(data)

# # Create a MultiIndex with the existing columns
# existing_columns_multiindex = pd.MultiIndex.from_tuples([
#     ('GENERAL INFO', 'Catchment'),  # Header with no subheaders
#     ('GENERAL INFO', 'Node'),  # Header with no subheaders
# ])

# # Create a MultiIndex with the upper level 'GENERAL INFO'
# upper_level = [('GENERAL INFO', '')] * len(existing_columns_multiindex)

# # Concatenate the upper level and the existing columns MultiIndex
# new_columns_multiindex = pd.MultiIndex.from_tuples(list(zip(upper_level, existing_columns_multiindex)))

# # Assign the new MultiIndex to the DataFrame columns
# accumulation_df.columns = new_columns_multiindex

# accumulation_df


In [24]:
accumulation_df

''''''''''
rank manholes as per how many catchments they have
first 'merge' those ith just 1
then merge those with just 2. Register which ones they replace. Do not repeat. If the merge happened already, copy instead.
then merge those with just 3. For each, check if they are in previous merges. 
''''''''''

accumulation_df['Node']



SyntaxError: EOL while scanning string literal (<string>, line 8)

In [27]:
accumulation_df

Unnamed: 0_level_0,GENERAL INFO,GENERAL INFO,GENERAL INFO
Unnamed: 0_level_1,CATCHMENT,NODE,ID
0,2016,7494,HLR_MH10A
1,2011,7494,HLR_MH10A
2,2020,7494,HLR_MH10A
3,2047,7494,HLR_MH10A
4,2054,7494,HLR_MH10A
...,...,...,...
14088,2035,7140,GNK_MH41
14089,2033,7140,GNK_MH41
14090,2122,6790,SEBD_MH6
14091,2052,9670,HLY_MH61


In [47]:
[2,3] in [2,3,4]

False

In [50]:
sub_list = [2, 3]
main_list = [2, 3, 4]

# Check if all elements in sub_list are present in main_list
result = all(item in main_list for item in sub_list)

print(result)  # This will print True



True


NameError: name 'sort' is not defined

In [53]:
all([True,False])

False

In [79]:
merge_set = set()
for node in accumulation_df[('GENERAL INFO','NODE')].unique():
    catchments = list(accumulation_df[accumulation_df[('GENERAL INFO','NODE')]==node][('GENERAL INFO','CATCHMENT')].unique())
    catchments = tuple(sorted(catchments))
    merge_set.add(catchments)
print(len(merge_set))

103


In [80]:
len(accumulation_df[('GENERAL INFO','NODE')].unique())

469

In [76]:
sorted(['a','c','b'])

['a', 'b', 'c']

In [104]:
accumulation_df

Unnamed: 0_level_0,GENERAL INFO,GENERAL INFO,GENERAL INFO
Unnamed: 0_level_1,CATCHMENT,NODE,ID
0,2016,7494,HLR_MH10A
1,2011,7494,HLR_MH10A
2,2020,7494,HLR_MH10A
3,2047,7494,HLR_MH10A
4,2054,7494,HLR_MH10A
...,...,...,...
14088,2035,7140,GNK_MH41
14089,2033,7140,GNK_MH41
14090,2122,6790,SEBD_MH6
14091,2052,9670,HLY_MH61


In [99]:
accumulation_df[accumulation_df[('GENERAL INFO','NODE')]=='7494']

Unnamed: 0_level_0,GENERAL INFO,GENERAL INFO,GENERAL INFO
Unnamed: 0_level_1,CATCHMENT,NODE,ID
0,2016,7494,HLR_MH10A
1,2011,7494,HLR_MH10A
2,2020,7494,HLR_MH10A
3,2047,7494,HLR_MH10A
4,2054,7494,HLR_MH10A
5,2053,7494,HLR_MH10A
6,2023,7494,HLR_MH10A
7,2039,7494,HLR_MH10A
8,2044,7494,HLR_MH10A
9,2032,7494,HLR_MH10A


In [101]:
list(accumulation_df[accumulation_df[('GENERAL INFO','NODE')]==index][('GENERAL INFO','CATCHMENT')].unique())

['2122', '2119']

In [105]:
rank_df

Unnamed: 0_level_0,Catchment_Count,Catchments
"(GENERAL INFO, NODE)",Unnamed: 1_level_1,Unnamed: 2_level_1
7311,1,2169
6786,1,"(2122,)"
6787,1,"(2122,)"
6788,1,"(2122,)"
6789,1,"(2122,)"
...,...,...
9766,104,
9764,104,
9763,104,
9765,104,


In [124]:
merge_set = set()

rank_df = accumulation_df[[('GENERAL INFO','NODE'),('GENERAL INFO','CATCHMENT')]].groupby([('GENERAL INFO','NODE')]).count()

# rank_df.reset_index(inplace=True)
rank_df.columns = ['Catchment_Count']
max_catchments = max(rank_df.Catchment_Count)
rank_df.sort_values(by=['Catchment_Count'],inplace=True)
# rank_df.reset_index(inplace=True)

catchment_list = []
merge_set = set()
for index, row in rank_df.iterrows():

    catchments = list(accumulation_df[accumulation_df[('GENERAL INFO','NODE')]==index][('GENERAL INFO','CATCHMENT')].unique())
    catchments = tuple(sorted(catchments))
#     rank_df.loc[index,'Catchments'] = catchments
    catchment_list.append(catchments)
    merge_set.add(catchments)
    
    
rank_df['Catchments'] = catchment_list
rank_df['Node'] = rank_df.index
print(len(merge_set))

merge_list = []
for i, catchments in enumerate(merge_set):
    merge_id = 'Merge_ID_' + str(i)
    merge_list.append([merge_id,catchments])
    
merge_df = pd.DataFrame(merge_list,columns=['Merge_ID','Catchments'])
merge_df['Catchment_Count'] = merge_df['Catchments'].apply(len)
merge_df.sort_values(by=['Catchment_Count'],ascending=False,inplace=True)
merge_df.reset_index(inplace=True,drop=True)

# rank_df = pd.merge(rank_df,merge_df, on=['Catchments'],how='inner')
# rank_df.set_index('Node',inplace=True)
    
print(max_catchments)

# for index1, row1 in merge_df.iterrows():
#     print

merge_df

103
105


Unnamed: 0,Merge_ID,Catchments,Catchment_Count
0,Merge_ID_82,"(10108, 10109, 10110, 2059, 2060, 2061, 2062, ...",105
1,Merge_ID_96,"(10108, 10110, 2059, 2060, 2061, 2062, 2063, 2...",104
2,Merge_ID_57,"(2059, 2060, 2061, 2062, 2063, 2064, 2065, 206...",99
3,Merge_ID_52,"(2059, 2061, 2062, 2063, 2064, 2065, 2066, 206...",98
4,Merge_ID_16,"(2059, 2062, 2063, 2064, 2066, 2068, 2069, 207...",95
...,...,...,...
98,Merge_ID_43,"(2050, 2052)",2
99,Merge_ID_38,"(2128, 2136)",2
100,Merge_ID_79,"(2052,)",1
101,Merge_ID_55,"(2122,)",1


In [134]:
cou = 0
for index1, row1 in merge_df.iterrows():
    catchments1 = list(row1['Catchments']_
    print(f'Index1: {index1}')
    for index2, row2 in merge_df[index1+1:].iterrows():
        catchments2 = row2['Catchments']
        if len(catchments1) >= len(catchments2):
            if all(item in catchments1 for item in catchments1):
                       
            

print(cou)        

Index1: 0
Index1: 1
Index1: 2
Index1: 3
Index1: 4
Index1: 5
Index1: 6
Index1: 7
Index1: 8
Index1: 9
Index1: 10
Index1: 11
Index1: 12
Index1: 13
Index1: 14
Index1: 15
Index1: 16
Index1: 17
Index1: 18
Index1: 19
Index1: 20
Index1: 21
Index1: 22
Index1: 23
Index1: 24
Index1: 25
Index1: 26
Index1: 27
Index1: 28
Index1: 29
Index1: 30
Index1: 31
Index1: 32
Index1: 33
Index1: 34
Index1: 35
Index1: 36
Index1: 37
Index1: 38
Index1: 39
Index1: 40
Index1: 41
Index1: 42
Index1: 43
Index1: 44
Index1: 45
Index1: 46
Index1: 47
Index1: 48
Index1: 49
Index1: 50
Index1: 51
Index1: 52
Index1: 53
Index1: 54
Index1: 55
Index1: 56
Index1: 57
Index1: 58
Index1: 59
Index1: 60
Index1: 61
Index1: 62
Index1: 63
Index1: 64
Index1: 65
Index1: 66
Index1: 67
Index1: 68
Index1: 69
Index1: 70
Index1: 71
Index1: 72
Index1: 73
Index1: 74
Index1: 75
Index1: 76
Index1: 77
Index1: 78
Index1: 79
Index1: 80
Index1: 81
Index1: 82
Index1: 83
Index1: 84
Index1: 85
Index1: 86
Index1: 87
Index1: 88
Index1: 89
Index1: 90
Index1: 9

In [133]:
index1

102

In [57]:
help(rank_df.sort_values)

Help on method sort_values in module pandas.core.frame:

sort_values(by, axis: 'Axis' = 0, ascending=True, inplace: 'bool' = False, kind: 'str' = 'quicksort', na_position: 'str' = 'last', ignore_index: 'bool' = False, key: 'ValueKeyFunc' = None) method of pandas.core.frame.DataFrame instance
    Sort by the values along either axis.
    
    Parameters
    ----------
            by : str or list of str
                Name or list of names to sort by.
    
                - if `axis` is 0 or `'index'` then `by` may contain index
                  levels and/or column labels.
                - if `axis` is 1 or `'columns'` then `by` may contain column
                  levels and/or index labels.
    axis : {0 or 'index', 1 or 'columns'}, default 0
         Axis to be sorted.
    ascending : bool or list of bool, default True
         Sort ascending vs. descending. Specify list for multiple sort
         orders.  If this is a list of bools, must match the length of
         the by.
    

In [20]:
# import pandas as pd

# # Sample data for the DataFrame
# data = {
#     ('GENERAL INFO', 'CATCHMENT'): accumulation_df.Catchment,
#     ('GENERAL INFO', 'NODE'): accumulation_df.Node,
# }

# # Create a DataFrame with MultiIndex columns
# df = pd.DataFrame(data)

# # Set names for the levels of the MultiIndex
# # df.columns.names = ['Header', 'Subheader']

# df