In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# TAA Post Processing

## Output Checking

### Standard Capacity Analysis Run with Default Initial Conditions

In [None]:
#demandtrends folder
root="/home/craig/runs/big_test/base-testdata-v7/"
dtrends = root+ "DemandTrends.txt"
df=pd.read_csv(dtrends, sep='\t')
df.head()

In order to plot a line chart of TotalRequired and Deployed, we group by time and sum the values so that we have the total TotalRequired and Deployed for each day.  If you don't reset_index, you get a multi-index dataframe from groupby, which you can't plot, but functions called on groupby (like sum() here) will sum the values in each group.

In [None]:
group_df = df.groupby(['t']).sum().reset_index()
group_df.head()

In [None]:
plt.plot('t', 'TotalRequired', data=group_df)
plt.plot('t', 'Deployed', data=group_df)

### Random Initial Conditions Output Checks

We've been storing the results the the parent directory alongside the MARATHON workbook.  results.txt is from random initial condition runs from marathon.analysis.random.

In [None]:
results = root+ "../results.txt" 
results

In [None]:
df=pd.read_csv(results, sep='\t')
df.head()

Here we count the number records for each \[SRC, AC\] group.  For x initial condition reps and y phases, we should have x*y records.  This is essentially pivoting in Python by count.

In [None]:
group_df = df.groupby(by=['SRC', 'AC']).count().reset_index()
group_df.head()

Check for any \[SRC, AC\] tuple that doesn't have x*y records.

In [None]:
group_df[group_df['rep-seed']!=12]

## Post Processing

We'd like to compute Score and Excess for each \[SRC, AC\] tuple.  

First, average NG fill, then average RC fill, then average NG fill, then sum and divide by demand for Score (note that fill is fill from demandtrends and NOT just deployed like the field was renamed in 2327)
Excess is sum of available for each component divided by demand

In [None]:
#compute % demand met (dmet) and % excess over the demand (emet) 
#first by phase (use average group by with src, ac, phase)
def by_phase_percentages(results_df):
    group_df = results_df.groupby(by=['SRC', 'AC', 'phase']).mean().reset_index()
    group_df['dmet'] = (group_df['NG-fill'] + group_df['AC-fill'] + group_df['RC-fill']) / group_df['total-quantity']
    group_df['emet'] = (group_df['NG-deployable'] + 
                        group_df['AC-deployable'] + 
                        group_df['RC-deployable']) / group_df['total-quantity']
    group_df.head()
    return group_df

In [None]:
#Weights used for a weighted score.
phase_weights= {"comp1" : 0.125,
               "comp2" : 0.125,
               "phase1" : .0625,
               "phase2" : .0625,
               "phase3" : .5,
               "phase4" : .125}

#add the weight to each row
def row_weight(row):
    return phase_weights[row['phase']]

#then group by src, ac, using custom function for weighted phases
def weighted_average(df, data_col, weight_col, by_col):
    df['weight']=df.apply(lambda row: row_weight(row), axis=1)
    df['_data_times_weight'] = df[data_col] * df[weight_col]
    g = df.groupby(by=by_col)
    #note that if we're missing a phase, the weight is adjusted accordingly
    res = g['_data_times_weight'].sum() / g[weight_col].sum()
    del df['_data_times_weight']
    return res

def by_src_inventory_scores(percentages_df):
    inventory_score = weighted_average(percentages_df, 'dmet', 'weight', ['SRC', 'AC'])
    inventory_excess = weighted_average(percentages_df, 'emet', 'weight', ['SRC', 'AC'])
    res_df=inventory_score.reset_index().rename(columns={0 : "Score"})
    #join both Score and Excess results.
    res_df = res_df.merge(inventory_excess.reset_index().rename(columns={0 : 'Excess'}), how='inner', on=['SRC', 'AC'])
    return res_df

def by_src_ac_scores(results_path):
    df=pd.read_csv(results_path, sep='\t')
    return by_src_inventory_scores(by_phase_percentages(df))

by_src_ac_scores(results)

In [None]:
#repeat this for the other demand
#join tables so that you have two score columns
#add column called min_score
#add another column called min_score_demand
#could turn this into a map to concat both demand tables then, but not necessary