In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# TAA Post Processing

## Output Checking

### Standard Capacity Analysis Run with Default Initial Conditions

In [None]:
#demandtrends folder
root="/home/craig/runs/big_test/base-testdata-v7/"
dtrends = root+ "DemandTrends.txt"
df=pd.read_csv(dtrends, sep='\t')
df.head()

In order to plot a line chart of TotalRequired and Deployed, we group by time and sum the values so that we have the total TotalRequired and Deployed for each day.  If you don't reset_index, you get a multi-index dataframe from groupby, which you can't plot, but functions called on groupby (like sum() here) will sum the values in each group.

In [None]:
group_df = df.groupby(['t']).sum().reset_index()
group_df.head()

In [None]:
plt.plot('t', 'TotalRequired', data=group_df)
plt.plot('t', 'Deployed', data=group_df)

### Random Initial Conditions Output Checks

We've been storing the results the the parent directory alongside the MARATHON workbook.  results.txt is from random initial condition runs from marathon.analysis.random.

In [None]:
results = root+ "../results.txt" 
results

In [None]:
df=pd.read_csv(results, sep='\t')
df.head()

Here we count the number records for each \[SRC, AC\] group.  For x initial condition reps and y phases, we should have x*y records.  This is essentially pivoting in Python by count.

In [None]:
group_df = df.groupby(by=['SRC', 'AC']).count().reset_index()
group_df.head()

Check for any \[SRC, AC\] tuple that doesn't have x*y records.

In [None]:
group_df[group_df['rep-seed']!=12]

## Post Processing

We'd like to compute Score and Excess for each \[SRC, AC\] tuple.  

First, average NG fill, then average RC fill, then average NG fill, then sum and divide by demand for Score (note that fill is fill from demandtrends and NOT just deployed like the field was renamed in 2327)
Excess is sum of available for each component divided by demand

In [None]:
import copy
dmet='demand_met'
emet='excess_met'
#returns % excess demand met for every SRC, AC, phase combination
#used once to find the max and once for actually dataframe computation
def compute_excess(in_df):
    in_df[emet]=(in_df['NG-deployable'] + in_df['AC-deployable'] + in_df['RC-deployable']) / in_df['total-quantity']
    
import numpy as np
#compute % demand met (dmet) and % excess over the demand (emet) 
#first by phase (use average group by with src, ac, phase)
def by_phase_percentages(results_df):
    group_df = results_df.groupby(by=['SRC', 'AC', 'phase']).mean().reset_index()
    #when there is no demand in a phase, dmet is 100%
    group_df[dmet] = np.where((group_df['total-quantity']==0), 1, 
                                                                (group_df['NG-fill'] + 
                                                                group_df['AC-fill'] + 
                                                                group_df['RC-fill']) / group_df['total-quantity'])
    #When there is no demand in a phase, emet is the max emet across all SRCs and phases.
    excess_df = copy.deepcopy(group_df[(group_df['total-quantity'] != 0)])
    compute_excess(excess_df)
    max_excess=excess_df[emet].max()+1
    
    group_df[emet] = np.where((group_df['total-quantity']==0), max_excess, 
                                                        (group_df['NG-deployable'] + 
                                                        group_df['AC-deployable'] + 
                                                        group_df['RC-deployable']) / group_df['total-quantity'])
    print(group_df['total-quantity'].isnull().sum())
    #this will be 0 because if there is no demand, we don't have a record.

    return group_df

Do first: 1 workbook
	(need to groupby.mean.unstack phase, but what do I expect?)
	Tab 1: src, ac, results by phase for demand 1, add score, excess
	Tab 2: src, ac, results by phase in columns for demand 2, add score excess
	Tab 3: src, ac, score-demand1, excess-demand1, score-dmd2, excess-dmd2, min-demand, min score.

In [None]:
from functools import reduce 

def results_by_phase(results_df):
    res=results_df.groupby(by=['SRC', 'AC', 'phase']).mean()
    return res.unstack(level=['phase'])

#Weights used for a weighted score.
phase_weights= {"comp1" : 0.125,
               "comp2" : 0.125,
               "phase1" : .0625,
               "phase2" : .0625,
               "phase3" : .5,
               "phase4" : .125}

d_weighted = 'dmet_times_weight'
e_weighted = 'emet_times_weight'
dmet_sum='weighted_dmet_sum'
emet_sum='weighted_emet_sum'
#given an ordered list of initial columns, put the rest of the columns in the dataframe at the end
def reorder_columns(order, df):
    cols=[c for c in order if c in df] + [c for c in df if c not in order]
    return df[cols]

#compute score and excess from a path to results.txt
def compute_scores(results_path):
    df=pd.read_csv(results_path, sep='\t')
    #sometimes all inventory was equal to 0, but we shouldn't have that. 
    #We should have all phases if all inventory ==0
    df= df[(df[['AC', 'NG', 'RC']] == 0).all(axis=1)==False]
    scores = by_phase_percentages(df)
    scores['weight']=scores['phase'].map(phase_weights)
    scores[d_weighted]=scores[dmet]*scores['weight']
    scores[e_weighted]=scores[emet]*scores['weight']
    res = results_by_phase(scores[['SRC', 'AC', 'NG', 'RC', 
                                   'phase', dmet, emet, 
                                   'weight', d_weighted,
                                  e_weighted]])
    res[('Score', dmet_sum)]=res.iloc[:, res.columns.get_level_values(0)==d_weighted].sum(axis=1)
    res[('Excess', emet_sum)]=res.iloc[:, res.columns.get_level_values(0)==e_weighted].sum(axis=1)
    res[('NG_inv', '')]=res.iloc[:, res.columns.get_level_values(0)=='NG'].max(axis=1)
    res[('RC_inv', '')]=res.iloc[:, res.columns.get_level_values(0)=='RC'].max(axis=1)
    #need to join multindex columns to single index columns in title_strength, so this the merge process
    tuples = [('SRC', ''), ('TITLE', ''), ('STR', '')]
    titles=copy.deepcopy(title_strength)
    titles.columns=pd.MultiIndex.from_tuples(tuples, names=(None, 'phase'))
    res = pd.merge(res.reset_index(),
          titles,
          on=[('SRC', '')],
          how='inner'
         ).set_index(['SRC', 'AC'])
    res.drop(['NG', 'RC'], axis=1, level=0, inplace=True)
    return res

Read in the SRC baseline for strength and OI title.

In [None]:
baseline = pd.read_excel('/home/craig/runs/big_test/TAA24-28_SRC_BASELINE_201130_DRAFTv6.xlsx', sheet_name='SRC_Baseline TAA 24-28')
title_strength=baseline[['SRC', 'TITLE', 'STR']]

In [None]:
import openpyxl

results_list = ["/home/craig/runs/big_test/results (copy 1).txt", "/home/craig/runs/big_test/results.txt"]
results_map = {'2.1' : "/home/craig/runs/big_test/results_no_truncate_and_1_supply.txt", 
               '7.1' : "/home/craig/runs/big_test/results.txt",
              '10.1' : "/home/craig/runs/big_test/results_no_truncation.txt"}

writer = pd.ExcelWriter('TAA24-28_Modeling_Results.xlsx', engine='xlsxwriter')
left=pd.DataFrame()

for demand_name in results_map:
    scored_results = compute_scores(results_map[demand_name])
    if left.empty:
        max_df=scored_results.reset_index().groupby('SRC')['AC'].apply(max)
        maxes=max_df.to_dict()
    #just to repeat the SRC in the output. Also will add an index on the left.
    scored_results.reset_index(inplace=True)
    #add max ac inventory
    scored_results['max_AC_inv']=scored_results['SRC'].map(maxes)
    #filter out the base inventories
    scored_results=scored_results[scored_results['AC']!=scored_results['max_AC_inv']]
    #add one to the remaining inventory records
    scored_results['AC']=scored_results['AC']+1
    #indicate those records that are the base supply
    scored_results['base_supply']=np.where((scored_results['AC']==scored_results['max_AC_inv']), 'X', 'Down')
    #remove maxes
    scored_results.drop(columns=['max_AC_inv'], level=0, inplace=True)
    #add scores to all_scores
    scored_results=scored_results.set_index(['SRC', 'AC'])
    score_columns=[('Score', dmet_sum), ('Excess', emet_sum)]
    score_col_names=['Score_'+demand_name, 'Excess_'+demand_name]
    #if left.empty:
        #score_columns=[('TITLE', '')] + score_columns
        #score_col_names = ['TITLE']+score_col_names
    scores = scored_results[score_columns]
    scores.columns=score_col_names
    if left.empty:
        left=scores
    else:
        right=scores
        left = pd.merge(left,right,on=['SRC', 'AC'], how='inner')
    #write to excel file here
    scored_results.reset_index(inplace=True)
    scored_results.rename(columns={'NG_inv':'NG', 'RC_inv':'AR', 'AC':'RA'}, inplace=True, level=0)
    initial_cols = [('SRC', ''), ('TITLE', ''), ('RA', ''), ('NG', ''), 
                    ('AR', ''),
                   ]
    reordered=reorder_columns(initial_cols, scored_results)
    reordered.to_excel(writer, sheet_name=demand_name) 
left.reset_index(inplace=True)

left['min_score']=left[['Score_'+demand_name for demand_name in results_map]].min(axis=1)
left['min_score_demand'] = left[['Score_'+demand_name for demand_name in results_map]].idxmin(axis=1)
#write third worksheet with all scores here
left = pd.merge(left, title_strength, on='SRC')
left = reorder_columns(['SRC', 'TITLE', 'AC'], left)
left.to_excel(writer, sheet_name='all_scores')
writer.save()


#Given a cell in a sheet starting at row row_start and in column,clear all cell contents
def clear_column(row_start, column, sh):
    for row in range(row_start,sh.max_row):
        if(sh.cell(row,column).value is  None):
            break
        sh.cell(row,column).value= None

wb = openpyxl.reader.excel.load_workbook('TAA24-28_Modeling_Results.xlsx')
ws=wb['all_scores']
#clear_column(2, 1, ws)
ws.delete_cols(1, 1)

When writing the multi-index dataframes to Excel, pandas put an extra blank row below the column names, which messes up the filter in LibreOffice, but not Excel.  In Excel, you could turn the filter on the blank row.  In LibreOffice, that didn't work.  Although, in LibreOffice, you can turn it on the first row and it captures the first value.  Excel does not.  So those are the filter workarounds, but it looks cleaner to just remove that blank row.

In [None]:
for demand_name in results_map:
    sh = wb[demand_name]
    sh.delete_rows(3, 1)
    #We don't want index to show, and can't do with multi-index to_excel yet, so have to do it manually
    clear_column(3, 1, sh)
wb.save('TAA24-28_Modeling_Results.xlsx')

In [None]:

#join tables so that you have two score columns
#add column called min_score
#add another column called min_score_demand
#could turn this into a map to concat both demand tables then, but not necessary