In [1]:
import pandas as pd
import numpy as np
import shutil
import os
import datetime

elementMapperCSV = r'P:\projects\eReefsOperational\CSIRO_Locs\CSIRO_GBR_ElementMapper.csv'
#Names from element mapper
regID = 'RegID'
riverCol = 'Major River'
elementCol = 'Network Element'

simMapperCSV = r'P:\projects\eReefs\GBRF_SpeedScenarios\GBRF_ScenarioExtension_ResultsMapperFiltered.csv'

#Names from sim mapper, use RegID from above
basinFolderName = 'BasinFolder'
existSimFolderName = 'SIMCODE'
#Switch these for TS Dependent or EMCMonthly
#existSimFolderName = 'TSDependentSubDir'

existSummariesFolderName = r'E:\GBRFScenarios\CSIROAggregatedOutlets\EmpiricalEMC'
outPath = r'E:\GBRFScenarios\SynthesisedTotalsEmpiricalEMC'
#existSummariesFolderName = r'E:\GBRFScenarios\CSIROAggregatedOutlets\GBRDynSedNet'
#outPath = r'E:\GBRFScenarios\SynthesisedTotalsGBRDynSedNet'

Constituents = ['Sediment - Fine', 'N_Particulate', 'N_DIN', 'N_DON', 'P_Particulate', 'P_DOP', 'P_FRP']

#regionIDs = {'BU':'BUBASE'}
regionIDs = {'BU':'BUBASE','MW':'MWBASE', 'WT':'WTBASE'}

regionBasins = {'BM':'BurnettMary', 'BU':'Burdekin', 'FI':'Fitzroy', 'MW':'MackayWhitsundays', 'WT':'WetTropics'}


scenariosForReg = {'BU':['LOWBFERTDB','LOWBIRRICB'], 'WT':['WETFERTDB','WETFERTCB','WETCANERET'], 'MW':['PLANFERTDB','PLANFERTCB']}


In [2]:
if not os.path.exists(outPath):
    os.makedirs(outPath)
    print("Made dir: " + outPath)


Made dir: E:\GBRFScenarios\SynthesisedTotalsEmpiricalEMC


In [3]:
allElementsTable = pd.read_csv(elementMapperCSV)
simDetailsDF = pd.read_csv(simMapperCSV)

In [4]:
def getResultsPathInfo(regionIDString, scenarioString):
    tsPath = existSummariesFolderName + '\\' + regionBasins[regionIDString] + '\\' + scenarioString
#     if addRegIdDIR:
#         tsPath = parentResultsDir + '\\' + regionIDString + '\\' + regionIDs[regionIDString] + '\\' + scenarioString + '\\TimeSeries'
#     else:
#         tsPath = parentResultsDir + '\\' + regionIDs[regionIDString] + '\\' + scenarioString + '\\TimeSeries'
    return tsPath

In [5]:
def createSynthesisedTotalsTS(thisRegID, theseSims):
    
    #grab baseline totals
    theBaseDIR = existSummariesFolderName + '\\' + regionBasins[thisRegID] + '\\' + regionIDs[thisRegID]
    allTotals = {}
    riverBasedMerged = {}
    baseCols = []
    dateList = []
    riversList = []
    counter = 0
    for fname in os.listdir(theBaseDIR):
        if ' Totals' in fname:
            #We're grabbing this baseline data
            riverSystemName = fname.split(' Totals')[0]
            #print("Base River name: " + riverSystemName)
            if not riverSystemName in riversList:
                riversList.append(riverSystemName)
                #print("Added River to list: " + riverSystemName)
                allTotals[riverSystemName] = {}
            
            baseFilePath = os.path.join(theBaseDIR,fname)
            baseData = pd.read_csv(baseFilePath)
            for col in baseData.columns:
                if not col == 'Date':
                    baseData[col] = baseData[col].astype(float)
            baseCols = baseData.columns.tolist()
            #print("Base Cols: " + str(baseCols))
            allTotals[riverSystemName]['BASE'] = baseData.copy()
            allTotals[riverSystemName]['SYNTH'] = baseData.copy()#just in case....
            riverBasedMerged[riverSystemName] = baseData.copy()
            if counter == 0:
                dateList = baseData['Date'].tolist()
            counter += 1
    
    #print("Date length: " + str(len(dateList)))
    #print("Rivers: " + str(riversList))
    constitCols = baseCols.copy()
    constitCols.remove('Date')
    constitCols.remove('Flow_cumecs')
    #print("Constituent Cols: " + str(constitCols))
    
    #make synthCols list, for later
    #Use basecols... it'll make sense later
    synthCols = []
    for consCol in baseCols:
        if consCol in constitCols:
            synthCols.append('Synth_' + consCol)
        else:
            #print("should be jamming this entry in here: " + consCol)
            synthCols.append(consCol)
    
    #print("Synth Cols: " + str(synthCols))
    
    simTotals = {}
    for sim in theseSims:
        theSimDIR = existSummariesFolderName + '\\' + regionBasins[thisRegID] + '\\' + sim
        for fname in os.listdir(theSimDIR):
            if ' Totals' in fname:
                riverSystemName = fname.split(' Totals')[0]
                #print("Sim River name: " + riverSystemName)
                simFilePath = os.path.join(theSimDIR,fname)
                simData = pd.read_csv(simFilePath)
                for col in simData.columns:
                    if not col == 'Date':
                        simData[col] = simData[col].astype(float)
                
                #Remove flow form sim
                simData.drop('Flow_cumecs', axis=1, inplace=True)
                
                #rename constitCols
                for col in constitCols:
                    simData.rename(columns={col:sim + '_' + col}, inplace=True)
                    #print("SimCols: " + str(simData.columns))
                                
                allTotals[riverSystemName][sim] = simData.copy()
                riverBasedMerged[riverSystemName] = pd.merge(riverBasedMerged[riverSystemName], simData, how='left', left_on=['Date'], right_on=['Date'])
    
    #Now calc differences
    for theRiver in riversList:
        print("Assessing River name: " + theRiver)
        
        #Add in Diff cols for each constitent
        for col in constitCols:
            #riverBasedMerged[theRiver]['Diff_' + col] = 0.0
            
            #Build query string
            simCount = 0
            qryString = ''
            qryList = []
            for sim in theseSims:
                if simCount == 0:
                    qryString = sim + '_' + col
                else:
                    qryString = qryString + ', ' + sim + '_' + col
                
                simCount += 1
                qryList.append(sim + '_' + col)
            
            #print("Qry: " + str(qryList))
            #sum sims
            riverBasedMerged[theRiver]['SumSims_' + col] = riverBasedMerged[theRiver][qryList].sum(axis=1)
            
            #CalcDiff
            riverBasedMerged[theRiver].loc[(riverBasedMerged[theRiver][col] * simCount) >= riverBasedMerged[theRiver]['SumSims_' + col], 'Diff_' + col] = (riverBasedMerged[theRiver][col] * simCount) - riverBasedMerged[theRiver]['SumSims_' + col]
            riverBasedMerged[theRiver].loc[(riverBasedMerged[theRiver][col] * simCount) < riverBasedMerged[theRiver]['SumSims_' + col], 'Diff_' + col] = 0.0
            
            #Now synth calc
            riverBasedMerged[theRiver]['Synth_' + col] = riverBasedMerged[theRiver][col] - riverBasedMerged[theRiver]['Diff_' + col]
            
        #Now remove & rename
        ##riverBasedMerged[theRiver] = riverBasedMerged[theRiver].loc[riverBasedMerged[theRiver].columns.isin(synthCols)]
        riverBasedMerged[theRiver] = riverBasedMerged[theRiver].filter(synthCols, axis=1)
        
        for consCol in constitCols:
            riverBasedMerged[theRiver].rename(columns={'Synth_' + consCol:consCol}, inplace=True)
            
#         for theDate in dateList:
#             #print("Today: " + theDate)
#             todaysLoads = {}
#             todaysLoads['BASE'] = {}
#             todaysLoads['DIFF'] = {}
#             for sim in theseSims:
#                 todaysLoads[sim] = {}
        
#             baseRow = allTotals[theRiver]['BASE'].loc[allTotals[theRiver]['BASE']['Date'] == theDate]
#             #print("Base Row Count: " + str(len(baseRow)))
#             for cons in constitCols:
#                 todaysLoads['BASE'][cons] = baseRow[cons].item()
#                 todaysLoads['DIFF'][cons] = 0.0
        
#             for sim in theseSims:
#                 simRow = allTotals[theRiver][sim].loc[allTotals[theRiver][sim]['Date'] == theDate]
            
#                 for cons in constitCols:
#                     #todaysLoads[sim][cons] = simRow[cons]
#                     #print(baseRow[cons].item())
#                     #print(simRow[cons].item())
#                     theVal = simRow[cons].item()
#                     if theVal < todaysLoads['BASE'][cons]:
#                         todaysLoads['DIFF'][cons] += todaysLoads['BASE'][cons] - theVal
            
#             #Been through each of the sims, now look at combined difference
#             #synthRow = allTotals[theRiver]['SYNTH'].loc[allTotals[theRiver]['SYNTH']['Date'] == theDate]
#             for cons in constitCols:
#                 if todaysLoads['DIFF'][cons] >= todaysLoads['BASE'][cons]:
#                     #Prevent negatives
#                     #synthRow[cons] = 0
#                     allTotals[theRiver]['SYNTH'].loc[allTotals[theRiver]['SYNTH']['Date'] == theDate][cons] = 0
# #                else:
#                 elif todaysLoads['DIFF'][cons] > 0:
#                     #synthRow[cons] = baseRow[cons].item() - todaysLoads['DIFF'][cons]
# #                     if theDate == '1986-07-28' and cons == 'N_DIN_kg':
# #                         print("Here we are")
# #                         print(todaysLoads['BASE'][cons])
# #                         print(todaysLoads['DIFF'][cons])
#                     allTotals[theRiver]['SYNTH'].loc[allTotals[theRiver]['SYNTH']['Date'] == theDate][cons] = todaysLoads['BASE'][cons] - todaysLoads['DIFF'][cons]
        
        #Now write the rivers synthesised totals???
        outPathFileDir = outPath + '\\' + regionBasins[thisRegID]
        
        if not os.path.exists(outPathFileDir):
            os.makedirs(outPathFileDir)
            print("Made dir: " + outPathFileDir)
    
        outPathFile = outPathFileDir + '\\' + theRiver + ' Totals.csv'
        #allTotals[theRiver]['SYNTH'].to_csv(outPathFile, index=False)
        riverBasedMerged[theRiver].to_csv(outPathFile, index=False)
        print("Written file: " + outPathFile)
        
        
    print("Done " + regionBasins[thisRegID] + " at " + str(datetime.datetime.now()))

In [6]:
#This is where we will loop through sims
for theReg in regionIDs.keys():
    simList = scenariosForReg[theReg]
    print("Doing this basin: " + regionBasins[theReg] + " at " + str(datetime.datetime.now()))
    createSynthesisedTotalsTS(theReg, simList)



# for index, row in simDetailsDF.iterrows():
#     if row[regID] in regionIDs.keys():
#         if row[newSimFolderName] in scenariosForReg[row[regID]]:
#             print("Doing this simulation: " + row[newSimFolderName] + " at " + str(datetime.datetime.now()))
#             #Filter out the river elements for this region
#             filteredElements = allElementsTable.loc[allElementsTable[regID] == row[regID]]
#             doThisSim(row[regID], row[basinFolderName], row[newSimFolderName], row[existSimFolderName], filteredElements)

print("All finished at " + str(datetime.datetime.now()))

Doing this basin: Burdekin at 2021-11-11 12:16:45.471775
Assessing River name: Alligator Creek
Made dir: E:\GBRFScenarios\SynthesisedTotalsEmpiricalEMC\Burdekin
Written file: E:\GBRFScenarios\SynthesisedTotalsEmpiricalEMC\Burdekin\Alligator Creek Totals.csv
Assessing River name: Barrata Creek
Written file: E:\GBRFScenarios\SynthesisedTotalsEmpiricalEMC\Burdekin\Barrata Creek Totals.csv
Assessing River name: Black River
Written file: E:\GBRFScenarios\SynthesisedTotalsEmpiricalEMC\Burdekin\Black River Totals.csv
Assessing River name: Bluewater Creek
Written file: E:\GBRFScenarios\SynthesisedTotalsEmpiricalEMC\Burdekin\Bluewater Creek Totals.csv
Assessing River name: Bohle River
Written file: E:\GBRFScenarios\SynthesisedTotalsEmpiricalEMC\Burdekin\Bohle River Totals.csv
Assessing River name: Burdekin River
Written file: E:\GBRFScenarios\SynthesisedTotalsEmpiricalEMC\Burdekin\Burdekin River Totals.csv
Assessing River name: Don River
Written file: E:\GBRFScenarios\SynthesisedTotalsEmpirical