## Airborne Tool
Author: Harrison Baker


In [183]:
# package imports
import pandas as pd
import re
import numpy as np


Setting up adjustment information

In [198]:
# important variables defined

periods = ["MS", "Day", "OOH", "Eve", "ES", "Night", "SS", "SA"]

min_value = 25

countDict = {
    "MS": [0, 5, 15, 25],
    "Day": [0, 10, 20, 30],
    "OOH": [0, 5, 15, 25],
    "Eve": [0, 5, 15, 25],
    "ES": [0, 5, 15, 25],
    "Night": [0, 5, 15, 25],
    "SS": [0],
    "SA": [0]
}

scenarioSettings = ["Scenario Name", "Scenario Description", "Variant Number"]
scenarioSettings.extend(periods)


### Adjustment for Scenario DataFrame

For each new scenario add a new row (follow the exiting patern). Print the existing DF if you are unsure. Numerical value for scenario represents the Lw of the activity assuming that it was modelled in CadnaA as Lw = 100. If modelling was done with real Lw values, set all 'adjustment' values to 100 so there is no change to the Lw. Format is:
| Scenario Name | Scenario Description | Variant Number | MS Adj | Day Adj | Day OOH Adj | Eve Adj | ES Adj | Night Adj | SS Adj | SA Adj |
| :------- | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: | :------: | -------: |
| ABC | Rockhammering shaft | 1 | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 100 |

In [202]:
# adjustment dictionary

adjustmentDf = pd.DataFrame(columns=scenarioSettings)
adjustmentDf.loc[0] = ["SCEN1", "a brief description", 1, np.nan, 105, 99, np.nan,  90, 110, 120, 80]
adjustmentDf.loc[1] = ["SCEN2", "a brief description", 5, np.nan, 105, 99, np.nan,  90, 110, 120, 80]
adjustmentDf.loc[2] = ["SCEN3", "a brief description", 4, np.nan, 105, 99, np.nan,  90, 110, 120, 80]

print(adjustmentDf)

  Scenario Name Scenario Description  Variant Number  MS  Day  OOH  Eve  ES  \
0         SCEN1  a brief description               1 NaN  105   99  NaN  90   
1         SCEN2  a brief description               5 NaN  105   99  NaN  90   
2         SCEN3  a brief description               4 NaN  105   99  NaN  90   

   Night   SS  SA  
0    110  120  80  
1    110  120  80  
2    110  120  80  


open cadnaA excel file, for each unique NCA_ID take the max value for each variant, adjust for scenario and write results df grouped by period

In [226]:
# open cadnaA results as df

cadnaA = pd.read_excel("RawCadnaA.xlsx")
dataColumns = cadnaA.columns[cadnaA.columns.get_loc(1):] # doesn't include NMLs
infoColumns = cadnaA.columns[:cadnaA.columns.get_loc("Day_NML")] # info up to NMLs

def reduceToMax(df: pd.DataFrame, nca_id = "NCA_ID", nca_tool = "NCA_tool"):
    max_df = df.groupby(nca_id, as_index = False).max().sort_values(by=nca_tool).set_index(nca_id, drop = False)
    max_df.loc[max_df[nca_tool] == "OSR", nca_tool] = max_df[nca_id].str.split('-').str[0] # note that MXU are assumed to have OSR tool with NCAXX
    return max_df

reduced = reduceToMax(cadnaA)

def calcPeriodLevels(row, corrections: pd.Series, scenarioName, variantId):
    # shout out AMo for this function
    if not variantId in row.index:
        return row
    for periodName, periodValue in corrections.items():
        if periodValue == np.nan:
            continue
        newLevel = row[variantId] + periodValue - 100
        row[f"{scenarioName}_{periodName}"] = newLevel
    return row

def rmValsBelowFloor(x):
    if isinstance(x, float):
        if x < min_value:
            return "-"
        else:
            return x
    return x

def applyAdjustment(df: pd.DataFrame, periods, infoColumns, dataColumns, adjustmentDf, nca_tool = "NCA_tool"):
    
    base = df.copy()
    
    adjusted = base[infoColumns].copy()
    data = base[dataColumns].copy().round(0).astype(float)

    mask = adjusted[nca_tool].str.startswith("NCA")
    base["SS_NML"] = np.where(mask, np.maximum(base["Night_NML"] + 10, 52), 999) # discuss with MT
    base["SA_NML"] = np.where(mask, np.maximum(base["SS_NML"], 65), 999)
    base["OOH_NML"] = np.where(mask, base["Day_NML"] - 5, base["Day_NML"])

    scenarioDict= {
        x: None for x in adjustmentDf["Scenario Name"]
    }

    for id, s in adjustmentDf.iterrows():
        scenarioName = s["Scenario Name"]
        # print(s)
        variantId = s["Variant Number"]
        corrections = s[periods]
        scenarioDf = data.copy()
        scenarioDf = scenarioDf.apply(calcPeriodLevels, scenarioName = scenarioName, variantId = variantId, corrections = corrections, axis = 1)
        scenarioDf = scenarioDf.dropna(axis=1, how='all')#.map(rmValsBelowFloor)
        # for col in scenarioDf.columns:
        #     scenarioDf[col] = scenarioDf[col].apply(lambda x: "-" if x < min_value else x)
        scenarioDict[scenarioName] = scenarioDf


    for p in periods: # figure out how to remove nested for loop 
        for name, subDf in scenarioDict.items():
            cols = [c for c in subDf.columns if isinstance(c, str) and '_' in c and c.split('_')[-1] == p]
            if not cols:
                continue

            adjusted[f"{p}_NML"] = base[f"{p}_NML"]

            adjusted = adjusted.join(subDf[cols])

    adjusted.sort_values(by=[nca_tool, "Address"], inplace = True) # note that if inplace isnt set as True it will not update the existing dataframe
    
    return adjusted, data, scenarioDict

adjusted, data, scenarioDict = applyAdjustment(reduced, periods, infoColumns, dataColumns, adjustmentDf)

# adjusted.to_csv('testFullTable.csv', index=False)

print(adjusted.head())

                          NCA_tool                     NCA_ID  \
NCA_ID                                                          
RES-NCA06-00190002497        NCA06      RES-NCA06-00190002497   
OSR_MXU-NCA06-00190007454    NCA06  OSR_MXU-NCA06-00190007454   
OSR_MXU-NCA07-00190007465    NCA07  OSR_MXU-NCA07-00190007465   
OSR_MXU-NCA07-00190007424    NCA07  OSR_MXU-NCA07-00190007424   
OSR_COM-NCA07-00190002328  OSR_COM  OSR_COM-NCA07-00190002328   

                                                           Address  Num_Units  \
NCA_ID                                                                          
RES-NCA06-00190002497            168-170 KENT STREET MILLERS POINT          1   
OSR_MXU-NCA06-00190007454  200  CUMBERLAND STREET, THE ROCKS (SYDN          1   
OSR_MXU-NCA07-00190007465         18A PITT STREET, SYDNEY (SYDNEY)          1   
OSR_MXU-NCA07-00190007424       38  BRIDGE STREET, SYDNEY (SYDNEY)          1   
OSR_COM-NCA07-00190002328                    1 BLIGH STREE

In [231]:
countDict = {
    "MS": [0, 5, 15, 25],
    "Day": [0, 10, 20, 30],
    "OOH": [0, 5, 15, 25],
    "Eve": [0, 5, 15, 25],
    "ES": [0, 5, 15, 25],
    "Night": [0, 5, 15, 25],
    "SS": [0],
    "SA": [0]
}

testCount = np.asarray([3, 0, -1, 10])

# for key, edges in countDict.items():
#     for n in range(len(countDict[key]) -1):
#         low, high = edges[n], edges[n+1]
#         count = sum(low < x <= high for x in testCount)
#         print(f"{key} {low} - {high}: {count}")

# get each column from the df out, get countDict key by splitting on _ and taking 


# turn this into funciton to get counts 
# then use to get the counts per uniq value of osr_tool

filteredColumns = [name for name in adjusted.columns if adjustmentDf["Scenario Name"].str.contains(name.split('_')[0]).any()]
# print(filteredColumns)

countDictionary = {}

for name in filteredColumns : 
  period = name.split('_')[1]
  bins = countDict[period] + [np.inf] # to get period
  bins = [x + 0.1 for x in bins ] # as floats so this effectively converts the np.hist [) feature to (]
  periodNml = adjusted[f"{period}_NML"]
  colValues = adjusted[name]
  diffs = np.asarray(colValues - periodNml)
  hist, _ = np.histogram(diffs, bins = bins)
  countDictionary[name] = hist.tolist() # update to map the histagram to dictionary with key from column name
  # write to dictionary
#   add hist to dictionary of counts keyed by scenario name (which includes time period)
# countDict[df.columns.split[1]

print(countDictionary)
# once dictionary, make dfs, then make dict of dfs then join - figure out how to ensure that join doesnt look ugly 
# i.e. same number of rows for each period even if different number of bins

# for key, edges in countDict.items():
#     print(key, edges)
#     bins = edges + [np.inf]
#     bins = [x + 0.1 for x in bins]
#     hist, _ = np.histogram(testCount, bins=bins)
#     print(hist)


    

{'SCEN1_Day': [2, 1, 0, 0], 'SCEN2_Day': [3, 2, 0, 0], 'SCEN3_Day': [1, 2, 0, 0], 'SCEN1_OOH': [0, 1, 0, 0], 'SCEN2_OOH': [0, 2, 0, 0], 'SCEN3_OOH': [0, 2, 0, 0], 'SCEN1_ES': [0, 0, 0, 0], 'SCEN2_ES': [0, 0, 0, 0], 'SCEN3_ES': [0, 0, 0, 0], 'SCEN1_Night': [1, 2, 1, 0], 'SCEN2_Night': [5, 3, 2, 0], 'SCEN3_Night': [4, 1, 2, 0], 'SCEN1_SS': [0], 'SCEN2_SS': [0], 'SCEN3_SS': [0], 'SCEN1_SA': [0], 'SCEN2_SA': [0], 'SCEN3_SA': [0]}
