In [19]:
#%run resource_utils.ipynb


In [20]:
"""
Usage: ./cpu.py config1.json,config2.json,...,configN.json

Determine the CPU model by running under various configuration changes. BaseModel.json and RealisticModel.json
provide defaults and configN.json overrides values in those configs or earlier ones in the list
"""

from __future__ import division
from __future__ import print_function

import sys
import collections
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
#from configure import configure, run_model, mc_event_model, in_shutdown
#from performance import performance_by_year
#from utils import time_dependent_value
from resource_utils import performance_by_year,time_dependent_value,configure, run_model, mc_event_model, in_shutdown
from resource_utils import seconds_per_year,seconds_per_month,mega,kilo,tera
# Basic parameters

running_time = 7.8E06

In [21]:
def create_model(list_of_json):
    modelNames = None
    if len(list_of_json) > 0:
        modelNames=[]
        for a in list_of_json:
            modelNames = modelNames+ a.split(',')
    model = configure(modelNames)
    return model

In [22]:
from __future__ import print_function
# Get the performance year by year which includes the software improvement factor
def software_performance(model):
    YEARS = list(range(model['start_year'], model['end_year']+1))
    processing_times={}
    processing_times['reco'] = {year: performance_by_year(model, year, 'RECO', data_type='data')[0] for year in YEARS}

    processing_times['lhc_mc'] = {year: performance_by_year(model, year, 'GENSIM',
                                                            data_type='mc', kind='2017')[0] +
                                  performance_by_year(model, year, 'DIGI',
                                                      data_type='mc', kind='2017')[0] +
                                  performance_by_year(model, year, 'RECO',
                                                      data_type='mc', kind='2017')[0] for year in YEARS}

    processing_times['hllhc_mc'] = {year: performance_by_year(model, year, 'GENSIM',
                                                               data_type='mc', kind='2026')[0] +
                                    performance_by_year(model, year, 'DIGI',
                                                        data_type='mc', kind='2026')[0] +
                                    performance_by_year(model, year, 'RECO',
                                                        data_type='mc', kind='2026')[0] for year in YEARS}

#    print("Year / Reco / LHC SIM / HLLHC SIM times")
#    for year in YEARS:
#        print(year,
#              int(processing_times['reco'][year]),
#              int(processing_times['lhc_mc'][year]),
#              int(processing_times['hllhc_mc'][year]))
#    print()
    
    return processing_times


In [23]:
# general pattern:
# _required: HS06
# _time: HS06s

# CPU time requirement calculations, in HS06 * s
# Take the running time and event rate from the model
def compute_cpu_needs(model,processing_times):
    cpu_efficiency = model['cpu_efficiency']
    YEARS = list(range(model['start_year'], model['end_year']+1))
    
    events={}
    cpu_time={}
    cpu_required={}
    events['data'] = {i: run_model(model, i, data_type='data').events for i in YEARS}
    events['lhc_mc'] = {i: mc_event_model(model, i)['2017']  for i in YEARS}
    events['hllhc_mc'] = {i: mc_event_model(model, i)['2026']  for i in YEARS}

#Note the quantity below is for prompt reco only.
    cpu_time['data'] = {i : events['data'][i] * processing_times['reco'][i] / cpu_efficiency for i in YEARS}
    cpu_time['lhc_mc'] = {i : events['lhc_mc'][i] * processing_times['lhc_mc'][i] / cpu_efficiency for i in YEARS}
    cpu_time['hllhc_mc'] = {i : events['hllhc_mc'][i] * processing_times['hllhc_mc'][i] / cpu_efficiency for i in YEARS}

# how much of the data needs to be prompt reconstructed
#    prompt_fraction = {i : time_dependent_value(year=i,values=model['prompt_fraction']) for i in YEARS}
    prompt_fraction = {i : 1.0 for i in YEARS}
    #print(model['prompt_fraction'])
    #print(time_dependent_value(year=2017,values=model['prompt_fraction']))
    #print("Prompt fraction",prompt_fraction)

    # The data need to be reconstructed about as quickly as we record them.  In
# addition, we need to factor in express, repacking, AlCa, CAF
# functionality and skimming.  Presumably these all scale like the data.
# Per the latest CRSG document, these total to 123 kHS06 compared to 240
# kHS016 for the prompt reconstruction, which we can round to 50%, so
# multiply by 50%.  (Ignoring the 10 kHS06 needed for VO boxes, which
# won't scale up and is also pretty small.)


    cpu_required['data'] = {i : (1.5 * cpu_time['data'][i] / running_time)
                            for i in YEARS}

# Also keep using the _time variables to sum up the total HS06 * s needed,
# which frees us from assumptions on time needed to complete the work.

    cpu_time['data'] = {i : 1.5 * prompt_fraction[i] * cpu_time['data'][i] for i in YEARS}

# In-year reprocessing model: assume we will re-reco 25% of the data each
# year, but we want to complete it in one month.  We also re-reco 25% of
# the previous year's data (assumed to be the same number of events as this
# year) but we want to do that in three months.

#it seems this is a stupid way to implement this.. not fixing just now
    cpu_required['rereco'] = {i : (1.0/ (cpu_efficiency*seconds_per_month)*
                                  max(0.25 * events['data'][i] * processing_times['reco'][i],
                                  events['data'][i] * processing_times['reco'][i] / (3.)))
                                  for i in YEARS}

# But the total time needed is the sum of both activities.

    cpu_time['rereco'] = {i : (1.25 * events['data'][i] * processing_times['reco'][i]) for i in YEARS}
    

# The corresponding MC, on the other hand, can be reconstructed over an
# entire year.  We can use this to calculate the HS06 needed to do those
# tasks.

    cpu_required['lhc_mc'] = {i : cpu_time['lhc_mc'][i] / seconds_per_year for i in YEARS}
    cpu_required['hllhc_mc'] = {i : cpu_time['hllhc_mc'][i] / seconds_per_year for i in YEARS}

# Unless it is a year with new detectors in, in which case we will have
# less time to make MC (say half as much).  Only applies to the current
# era, i.e. no need to compress HL-LHC MC when we are still in LHC era.

    for i in YEARS:
        if (i in model['new_detector_years']):
            if i < 2026:
                cpu_required['lhc_mc'][i] = cpu_time['lhc_mc'][i]/ (seconds_per_year / 2)
            else:
                cpu_required['hllhc_mc'][i] = cpu_time['hllhc_mc'][i]/ (seconds_per_year / 2)
    
    return events,cpu_time,cpu_required

In [24]:
def analysis_model_analysis_sets(model,events,cpu_time,cpu_required):
    cpu_efficiency = model['cpu_efficiency']
    #print("Using new analysis method")
    YEARS = list(range(model['start_year'], model['end_year']+1))
    
    cpu_time['analysis']={}
    for i in YEARS:
        dataReads,ty = time_dependent_value(year=i,values=model['AnalysisReadsPerYearData'])
        mcReads,ty = time_dependent_value(year=i,values=model['AnalysisReadsPerYearMC'])
        cpu_time['analysis'][i]=0.
        for j in model['AnalysisSet'][str(i)]:
            # 2.25 is 1 for prompt + 1.25 of rereco
            cpu_time['analysis'][i] += model['AnalysisCPUPerEvent'] * dataReads*2.25*events['data'][j]
            cpu_time['analysis'][i] += model['AnalysisCPUPerEvent'] * mcReads*events['lhc_mc'][j]
        if i > 2025:
            for j in model['AnalysisSet'][str(i)]:
                cpu_time['analysis'][i] += model['AnalysisCPUPerEvent'] * mcReads*events['hllhc_mc'][j]
        else:
            cpu_time['analysis'][i] += model['AnalysisCPUPerEvent'] * mcReads*events['hllhc_mc'][i]
        cpu_time['analysis'][i] = cpu_time['analysis'][i]  / cpu_efficiency

    cpu_required['analysis']={}

    # allow a component that scales with reconstruction
    analysisScaledByReco = model['AnalysisCPUScaledByReco']
    if analysisScaledByReco > 0:
        for i in YEARS:
            cpu_time['analysis'][i] += analysisScaledByReco * (cpu_time['lhc_mc'][i] + cpu_time['hllhc_mc'][i] + 
                                                            cpu_time['data'][i] +  cpu_time['rereco'][i])
    #now sum up everything
    for i in YEARS:
        cpu_required['analysis'][i] = cpu_time['analysis'][i]/seconds_per_year
    

In [25]:
def analysis_model_initial(model,events,cpu_time,cpu_required):
    print("Using old analysis method")

    cpu_required['analysis'] = {i : 0.75 *
                             (cpu_required['lhc_mc'][i] + cpu_required['hllhc_mc'][i] +
                              cpu_required['data'][i] + cpu_required['rereco'][i])
                             for i in YEARS}
    
    cpu_time['analysis'] = {i : 0.75* (cpu_time['data'][i] + cpu_time['rereco'][i] +
                         cpu_time['lhc_mc'][i] + cpu_time['hllhc_mc'][i])
                         for i in YEARS}

# But do something a little funkier for the time up to HL-LHC.  We are
# accumulating data, so analysis should keep taking longer.  Assume 2018 is
# "right".  In 2019 we will analyze 2018 data in addition to 2016 and 2017,
# so make 2019 1/3 bigger.  Keep the same amount through the shutdown when
# we don't accumulate data.  Then after the shutdown we keep adding in data
# years that are the same size as the previous ones, and then keep that
# flat until we ramp up HL-LHC studies in 2025 and we revert back to the
# 75% model.  Implemented here as a complete kludge.  Note that by kludging
# this way we don't absorb the software improvement factors...but that's
# OK, the analysis is I/O bound anyway and doesn't benefit from such
# improvements.

    cpu_time['analysis'][2019] = (4/3) * cpu_time['analysis'][2018]
    cpu_time['analysis'][2020] = cpu_time['analysis'][2019]
    cpu_time['analysis'][2021] = cpu_time['analysis'][2019]
    cpu_time['analysis'][2022] = (5/4)* cpu_time['analysis'][2021]
    cpu_time['analysis'][2023] = (6/5)* cpu_time['analysis'][2022]
    cpu_time['analysis'][2024] = (7/6)* cpu_time['analysis'][2023]
    
# More kludging: assume analysis takes place all year to calculate the HS06
# required for the above analysis CPU time.  Eric will hate this, I do too,
# we should fix it up later.

    for i in YEARS:
        if (i >= 2019 and i < 2025):
            cpu_required['analysis'][i] = cpu_time['analysis'][i]/seconds_per_year
                                                          

SyntaxError: EOL while scanning string literal (<ipython-input-25-f7b39bed7050>, line 10)

In [None]:
# Analysis!  Following something like the 2018 resource request, we make this
# 75% of everything else (for a moment).

# new json driven model
# conconstant time to read - just driven by analysis sets

def add_analysis_model(model,events,cpu_time,cpu_required):
    
    if 'AnalysisSet' in model:
        analysis_model_analysis_sets(model,events,cpu_time,cpu_required)
        return
    
    analysis_model_initial(model,events,cpu_time,cpu_required)
  

In [None]:
# Shutdown year model:

# If in the first year of a shutdown, need to reconstruct the previous
# three years of data, but you have all year to do it.  No need for all the
# ancillary stuff.  We need to do the MC also...assume similarly that we
# have three times as many events as we had the previous year.

def shutdown_year_model(model,processing_times,events,cpu_time,cpu_required):
    cpu_efficiency = model['cpu_efficiency']
    date_rereco_two_years=model['first_year_to_spread_rereco_over_two_years']
    YEARS = list(range(model['start_year'], model['end_year']+1))

    for i in YEARS:
        shutdown_this_year, dummy = in_shutdown(model,i)
        shutdown_last_year, dummy = in_shutdown(model,i-1)
        shutdown_next_year, dummy = in_shutdown(model,i+1)
        
        if not (shutdown_this_year and not(shutdown_last_year)):
            continue
        
        events['data'][i] = 3 * events['data'][i-1]
        if i >=date_rereco_two_years:
            events['data'][i]=0.5*events['data'][i]
        cpu_time['rereco'][i] = events['data'][i] * processing_times['reco'][i] / cpu_efficiency
        #print("ok",i,cpu_time['rereco'][i])
        cpu_required['rereco'][i] = cpu_time['rereco'][i] / seconds_per_year

        if i >=date_rereco_two_years:
            if i+1 in events['data']:
                if shutdown_next_year:
                    events['data'][i+1]=0
                    cpu_time['rereco'][i+1]=0
                    cpu_required['rereco'][i+1]=0
                events['data'][i+1]+=events['data'][i]
                cpu_time['rereco'][i+1] += cpu_time['rereco'][i]
                cpu_required['rereco'][i+1] += cpu_required['rereco'][i]

        keyToUpdate='hllhc_mc'
        if i < 2025:
            keyToUpdate='lhc_mc'
        events[keyToUpdate][i] = 3 * events[keyToUpdate][i-1]
        if i >=date_rereco_two_years:
            events[keyToUpdate][i]= 0.5*events[keyToUpdate][i]
        cpu_time[keyToUpdate][i] = events[keyToUpdate][i] * processing_times[keyToUpdate][i] / cpu_efficiency
        cpu_required[keyToUpdate][i] = cpu_time[keyToUpdate][i] / seconds_per_year
        if i >=date_rereco_two_years:
            if i+1 in events[keyToUpdate]:
                if shutdown_next_year:
                    events[keyToUpdate][i+1]=0
                    cpu_time[keyToUpdate][i+1]=0
                    cpu_required[keyToUpdate][i+1]=0
                events[keyToUpdate][i+1] += events[keyToUpdate][i]
                cpu_time[keyToUpdate][i+1] += cpu_time[keyToUpdate][i]
                cpu_required[keyToUpdate][i+1] += cpu_required[keyToUpdate][i] #this looks like a bug i+1. Fix later


In [None]:
# Sum up everything
def compute_totals(model,cpu_time,cpu_required):
    YEARS = list(range(model['start_year'], model['end_year']+1))
    all_keys=['data','rereco','lhc_mc','hllhc_mc','analysis']
    hpc_keys=['rereco','lhc_mc','hllhc_mc']
    cpu_required['total'] = {i : sum(v[i] for k,v in cpu_required.iteritems() if k in all_keys) for i in YEARS}
    cpu_time['total'] = {i : sum(v[i] for k,v in cpu_time.iteritems() if k in all_keys) for i in YEARS}
    
    cpu_required['hpc'] = {i : sum(v[i] for k,v in cpu_required.iteritems() if k in hpc_keys) for i in YEARS}
    cpu_time['hpc'] = {i : sum(v[i] for k,v in cpu_time.iteritems() if k in hpc_keys) for i in YEARS}
    
    return #total_cpu_required, total_cpu_time, hpc_cpu_required, hpc_cpu_time

In [None]:
from __future__ import print_function
def print_cpu_results(model,cpu_time,cpu_required,#total_cpu_required, total_cpu_time, hpc_cpu_required, hpc_cpu_time,
                      cpu_capacity,cpu_time_capacity,cpuCapacity,cpuTimeCapacity):
    YEARS = list(range(model['start_year'], model['end_year']+1))
    print("CPU requirements in HS06")
    print("Year Prompt NonPrompt LHCMC HLLHCMC Ana Total Cap1 Cap2 Ratio USCMS HPC")
    for i in YEARS:
        print(i, '{:04.3f}'.format(cpu_required['data'][i] / mega),
        '{:04.3f}'.format(cpu_required['rereco'][i] / mega),
        '{:04.3f}'.format(cpu_required['lhc_mc'][i] / mega),
        '{:04.3f}'.format(cpu_required['hllhc_mc'][i] / mega),
        '{:04.3f}'.format(cpu_required['analysis'][i] / mega),
        '{:04.3f}'.format(cpu_required['total'][i] / mega),
    '{:04.3f}'.format(cpu_capacity[i] / mega),
    '{:04.3f}'.format(cpuCapacity[str(i)] / mega), 'MHS06',
    '{:04.3f}'.format(cpu_required['total'][i]/cpuCapacity[str(i)]),
    '{:04.3f}'.format(0.4* (cpu_required['total'][i]) / mega),
    '{:04.3f}'.format(cpu_required['hpc'][i]/cpu_required['total'][i])
                  )

    print("CPU requirements in HS06 * s")
    print("Year Prompt NonPrompt LHCMC HLLHCMC Ana Total Cap1 Cap2 Ratio USCMS HPC")
    for i in YEARS:
        print(i, '{:03.2f}'.format(cpu_time['data'][i] / tera),
        '{:03.2f}'.format(cpu_time['rereco'][i] / tera),
        '{:03.2f}'.format(cpu_time['lhc_mc'][i] / tera),
        '{:03.2f}'.format(cpu_time['hllhc_mc'][i] / tera),
        '{:03.2f}'.format(cpu_time['analysis'][i] / tera),
        '{:03.2f}'.format(cpu_time['total'][i] / tera),
    '{:03.2f}'.format(cpu_time_capacity[i] / tera),
    '{:03.2f}'.format(cpuTimeCapacity[str(i)] / tera), 'THS06 * s',
    '{:03.2f}'.format(cpu_time['total'][i] / cpuTimeCapacity[str(i)]),
    '{:03.2f}'.format(0.4* (cpu_time['total'][i]) / tera),
    '{:03.2f}'.format(cpu_time['hpc'][i]/cpu_time['total'][i])
                  )
    return

In [None]:
# Then, CPU availability calculations.  This follows the "Available CPU
# power" spreadsheet.  Take a baseline value of 1.4 MHS06 in 2016, in
# future years subtract 5% of the previous for retirements, and add 300
# kHS06 which gets improved by the cpu_improvement in each year, until
# 2020, during LS2, when we shift the computing model to start buying an
# improved 600 kHS06 per year.

# This is kludgey -- need to establish the baseline to make the
# caluculation work, but once the calculation is there, delete the baseline
# for the histogram to work.  Not to mention that I couldn't get the
# dictionary comprehension to work here.

def compute_cpu_capacity(model):
    YEARS = list(range(model['start_year'], model['end_year']+1))
    cpu_improvement_factor = model['improvement_factors']['hardware']
    cpu_improvement = {i : cpu_improvement_factor ** (i-2017) for i in YEARS}

    #YUCK - I don't know how to get around this hardwired thingy
    cpu_capacity = {YEARS[0]-1 : 1.4 * mega}

    # This variable assumes that you can have the cpu_capacity for an entire
    # year and thus calculates the HS06 * s available (in principle).

    cpu_time_capacity = {YEARS[0]-1 : 1.4 * mega}

    retirement_rate = 0.05

    for i in YEARS:
        cpu_capacity[i] = cpu_capacity[i-1] * (1 - retirement_rate) + (300 if i < 2020 else 600) * kilo * cpu_improvement[i]
        cpu_time_capacity[i] = cpu_capacity[i] * seconds_per_year

    del cpu_capacity[YEARS[0]-1]
    del cpu_time_capacity[YEARS[0]-1]

    # CPU capacity model ala data.py

    # Set the initial points
    cpuCapacity = {str(model['capacity_model']['cpu_year']): model['capacity_model']['cpu_start']}
    cpuTimeCapacity = {str(model['capacity_model']['cpu_year']): model['capacity_model']['cpu_start'] * seconds_per_year}


    # A bit of a kludge. Assume what we have now was bought and will be retired in equal chunks over its lifetime
    cpuAdded = {}
    for year in range(model['capacity_model']['cpu_year'] - model['capacity_model']['cpu_lifetime'] + 1,
                      model['capacity_model']['cpu_year'] + 1):
        retired = model['capacity_model']['cpu_start'] / model['capacity_model']['cpu_lifetime']
        cpuAdded[str(year)] = retired

    cpuFactor = model['improvement_factors']['hardware']

    for year in YEARS:
        if str(year) not in cpuCapacity:
            cpuDelta = 0  # Find the delta which can be time dependant
            cpuDeltas = model['capacity_model']['cpu_delta']
            for deltaYear in sorted(cpuDeltas.keys()):
                if int(year) >= int(deltaYear):
                    lastCpuYear = int(deltaYear)
                    cpuDelta = model['capacity_model']['cpu_delta'][deltaYear]

            cpuAdded[str(year)] = cpuDelta * cpuFactor ** (int(year) - int(lastCpuYear))

            # Retire cpu added N years ago or retire 0

            cpuRetired = cpuAdded.get(str(int(year) - model['capacity_model']['cpu_lifetime']), 0)
            cpuCapacity[str(year)] = cpuCapacity[str(int(year) - 1)] + cpuAdded[str(year)] - cpuRetired
            cpuTimeCapacity[str(year)] = cpuCapacity[str(year)] * seconds_per_year 
    return cpu_capacity,cpu_time_capacity,cpuCapacity,cpuTimeCapacity

In [None]:
from __future__ import print_function
def print_cpu_info(model,cpu_time):
    YEARS = list(range(model['start_year'], model['end_year']+1))
    print("Fraction of CPU required for T1/T2 activities")
    print("Year\t Prmpt\t Rreco\tGen\tSim\tSimReco\t Anal\t USCPU")

    genFractionOfTotal=0.03
    us_fraction=model['us_fraction_T1T2']

    for year in YEARS:
        #first some calculations we didn't do before
        lhcSim=performance_by_year(model, year, 'GENSIM', data_type='mc', kind='2017')[0]
        lhcDigi=performance_by_year(model, year, 'DIGI', data_type='mc', kind='2017')[0]
        lhcReco=performance_by_year(model, year, 'RECO', data_type='mc', kind='2017')[0]
        hllhcSim=performance_by_year(model, year, 'GENSIM', data_type='mc', kind='2026')[0]
        hllhcDigi=performance_by_year(model, year, 'DIGI', data_type='mc', kind='2026')[0]
        hllhcReco=performance_by_year(model, year, 'RECO', data_type='mc', kind='2026')[0]

        lhcDigiFraction=lhcDigi/(lhcSim+lhcDigi+lhcReco)
        lhcRecoFraction=lhcReco/(lhcSim+lhcDigi+lhcReco)
        lhcSimFraction=lhcSim/(lhcSim+lhcDigi+lhcReco)

        hllhcDigiFraction=hllhcDigi/(hllhcSim+hllhcDigi+hllhcReco)
        hllhcRecoFraction=hllhcReco/(hllhcSim+hllhcDigi+hllhcReco)
        hllhcSimFraction=hllhcSim/(hllhcSim+hllhcDigi+hllhcReco)

        lhcFraction= cpu_time['lhc_mc'][year] / (cpu_time['lhc_mc'][year] + cpu_time['hllhc_mc'][year])

        totalT1T2 = (cpu_time['total'][year] - cpu_time['data'][year])*(1.0+genFractionOfTotal)

        totGenFraction = genFractionOfTotal
        totSimFraction = (lhcSimFraction*lhcFraction + 
                          hllhcSimFraction*(1.0-lhcFraction))*(cpu_time['lhc_mc'][year] + cpu_time['hllhc_mc'][year]) / totalT1T2
        totDigiFraction = (lhcDigiFraction*lhcFraction + 
                           hllhcDigiFraction*(1.0-lhcFraction))*(cpu_time['lhc_mc'][year] + cpu_time['hllhc_mc'][year]) / totalT1T2
        totRecoFraction = (lhcRecoFraction*lhcFraction + 
                           hllhcRecoFraction*(1.0-lhcFraction))*(cpu_time['lhc_mc'][year] + cpu_time['hllhc_mc'][year]) / totalT1T2

        rerecoFraction = cpu_time['rereco'][year] / totalT1T2
        analysisFraction = cpu_time['analysis'][year] / totalT1T2
        promptFraction = 0.
        uscpu= totalT1T2*us_fraction/tera

        print(year,'\t',
        '{:04.3f}'.format(promptFraction),'\t',
        '{:04.3f}'.format(rerecoFraction),'\t',
        '{:04.3f}'.format(totGenFraction),'\t',
        '{:04.3f}'.format(totSimFraction),'\t',
        '{:04.3f}'.format(totDigiFraction+totRecoFraction),'\t',
        '{:04.3f}'.format(analysisFraction),'\t',
        '{:04.2f}'.format(uscpu),'\t'
        )

In [None]:
def plot_hs06(model,cpu_required,cpu_capacity,cpuCapacity):
    # Plot the HS06

    # Squirt the dictionary entries into lists:
    YEARS = list(range(model['start_year'], model['end_year']+1))
    cpuDataList = []
    for year, item in sorted(cpu_required['data'].items()):
        cpuDataList.append(item/mega)
    cpuRerecoList = []
    for year, item in sorted(cpu_required['rereco'].items()):
        cpuRerecoList.append(item/mega)
    cpuLHCMCList = []
    for year, item in sorted(cpu_required['lhc_mc'].items()):
        cpuLHCMCList.append(item/mega)
    cpuHLLHCMCList = []
    for year, item in sorted(cpu_required['hllhc_mc'].items()):
        cpuHLLHCMCList.append(item/mega)
    cpuAnaList = []
    for year, item in sorted(cpu_required['analysis'].items()):
        cpuAnaList.append(item/mega)
    cpuCapacityList = []
    for year, item in sorted(cpu_capacity.items()):
        cpuCapacityList.append(item/mega)
    altCapacityList = []
#    print (cpu_capacity)
#    print (cpuCapacity)
    for year, item in sorted(cpuCapacity.items()):
        if int(year) in cpu_capacity:
            altCapacityList.append(item/mega)

    # Build a data frame from lists:

    pngKeyName=''
    if model['names'] is not None:
        for m in model['names']:
            pngKeyName=pngKeyName+'_'+m.split('/')[-1].split('.')[0]


    plotMaxs=model['plotMaximums']

    cpuFrame = pd.DataFrame({'Year': [str(year) for year in YEARS],
                                 'Prompt Data' : cpuDataList,
                                 'Non-Prompt Data' : cpuRerecoList,
                                 'LHC MC' : cpuLHCMCList,
                                 'HL-LHC MC' : cpuHLLHCMCList,
                                 'Analysis' : cpuAnaList}
                                )


    ax = cpuFrame[['Year', 'Prompt Data', 'Non-Prompt Data', 'LHC MC', 'HL-LHC MC',
                       'Analysis']].plot(x='Year',kind='bar',stacked=True,colormap='Paired')
    ax.set(ylabel='MHS06')
    ax.set(title='CPU by Type')

    handles, labels = ax.get_legend_handles_labels()
    handles=handles[::-1]
    labels=labels[::-1]
    ax.legend(handles,labels,loc='best', markerscale=0.25, fontsize=11)
    ax.set_ylim(ymax=plotMaxs['CPUByType'])
    minYearVal=max(0,model['minYearToPlot']-YEARS[0])-0.5 #pandas...

    ax.set_xlim(xmin=minYearVal)
    fig = ax.get_figure()
    fig.tight_layout()
    fig.savefig('CPUByType'+pngKeyName+'.png') 

    cpuCapacityFrame = pd.DataFrame({'Year': [str(year) for year in YEARS],
                                 'Prompt Data' : cpuDataList,
                                 'Non-Prompt Data' : cpuRerecoList,
                                 'LHC MC' : cpuLHCMCList,
                                 'HL-LHC MC' : cpuHLLHCMCList,
                                 'Analysis' : cpuAnaList,
                                 'Capacity, 5% retirement' : cpuCapacityList,
                                 'Capacity, 5 year retirement' : altCapacityList}
                                )

    ax = cpuCapacityFrame[['Year','Capacity, 5% retirement']].plot(x='Year',linestyle='-',marker='o', color='Red')
    cpuCapacityFrame[['Year','Capacity, 5 year retirement']].plot(x='Year',linestyle='-',marker='o', color='Blue',ax=ax)
    cpuCapacityFrame[['Year', 'Prompt Data', 'Non-Prompt Data', 'LHC MC',
                          'HL-LHC MC', 'Analysis']].plot(x='Year',kind='bar',stacked=True,ax=ax,colormap='Paired')
    ax.set(ylabel='MHS06')
    ax.set(title='CPU by Type and Capacity')
    ax.set_ylim(ymax=plotMaxs['CPUByTypeAndCapacity'])
    ax.set_xlim(xmin=minYearVal)
    handles, labels = ax.get_legend_handles_labels()
    handles=handles[::-1]
    labels=labels[::-1]
    ax.legend(handles,labels,loc='best', markerscale=0.25, fontsize=11)

    fig = ax.get_figure()
    fig.tight_layout()
    fig.savefig('CPUByTypeAndCapacity'+pngKeyName+'.png')

In [None]:
def plot_cpu_hs06s(model,cpu_time,cpu_time_capacity,cpuTimeCapacity):
    YEARS = list(range(model['start_year'], model['end_year']+1))
    # Do the same thing for the HS06 * d

    # Squirt the dictionary entries into lists:

    cpuDataTimeList = []
    for year, item in sorted(cpu_time['data'].items()):
        cpuDataTimeList.append(item/tera)
    cpuRerecoTimeList = []
    for year, item in sorted(cpu_time['rereco'].items()):
        cpuRerecoTimeList.append(item/tera)
    cpuLHCMCTimeList = []
    for year, item in sorted(cpu_time['lhc_mc'].items()):
        cpuLHCMCTimeList.append(item/tera)
    cpuHLLHCMCTimeList = []
    for year, item in sorted(cpu_time['hllhc_mc'].items()):
        cpuHLLHCMCTimeList.append(item/tera)
    cpuAnaTimeList = []
    for year, item in sorted(cpu_time['analysis'].items()):
        cpuAnaTimeList.append(item/tera)
    cpuCapacityTimeList = []
    for year, item in sorted(cpu_time_capacity.items()):
        cpuCapacityTimeList.append(item/tera)
    altCapacityTimeList = []
    for year, item in sorted(cpuTimeCapacity.items()):
        if int(year) in cpu_time_capacity:
            altCapacityTimeList.append(item/tera)

    pngKeyName=''
    if model['names'] is not None:
        for m in model['names']:
            pngKeyName=pngKeyName+'_'+m.split('/')[-1].split('.')[0]
    plotMaxs=model['plotMaximums']
    # Build a data frame from lists:

    cpuTimeFrame = pd.DataFrame({'Year': [str(year) for year in YEARS],
                                 'Prompt Data' : cpuDataTimeList,
                                 'Non-Prompt Data' : cpuRerecoTimeList,
                                 'LHC MC' : cpuLHCMCTimeList,
                                 'HL-LHC MC' : cpuHLLHCMCTimeList,
                                 'Analysis' : cpuAnaTimeList}
                                )


    ax = cpuTimeFrame[['Year', 'Prompt Data', 'Non-Prompt Data', 'LHC MC', 'HL-LHC MC', 'Analysis']].plot(x='Year',kind='bar',stacked=True,colormap='Paired')
    ax.set(ylabel='THS06 * s')
    ax.set(title='CPU seconds by Type')
    ax.set_ylim(ymax=plotMaxs['CPUSecondsByType'])
    minYearVal=max(0,model['minYearToPlot']-YEARS[0])-0.5 #pandas...
    ax.set_xlim(xmin=minYearVal)

    handles, labels = ax.get_legend_handles_labels()
    handles=handles[::-1]
    labels=labels[::-1]
    ax.legend(handles,labels,loc='best', markerscale=0.25, fontsize=11)

    fig = ax.get_figure()
    fig.tight_layout()
    fig.savefig('CPUSecondsByType'+pngKeyName+'.png')

    cpuTimeCapacityFrame = pd.DataFrame({'Year': [str(year) for year in YEARS],
                                    'Prompt Data' : cpuDataTimeList,
                                    'Non-Prompt Data' : cpuRerecoTimeList,
                                    'LHC MC' : cpuLHCMCTimeList,
                                    'HL-LHC MC' : cpuHLLHCMCTimeList,
                                    'Analysis' : cpuAnaTimeList,
                                    'Capacity, 5% retirement' : cpuCapacityTimeList,
                                        'Capacity, 5 year retirement' : altCapacityTimeList}
                                    )


    ax = cpuTimeCapacityFrame[['Year','Capacity, 5% retirement']].plot(x='Year',linestyle='-',marker='o', color='Red')
    cpuTimeCapacityFrame[['Year','Capacity, 5 year retirement']].plot(x='Year',linestyle='-',marker='o', color='Blue',ax=ax)
    cpuTimeCapacityFrame[['Year', 'Prompt Data', 'Non-Prompt Data', 'LHC MC', 'HL-LHC MC', 'Analysis']].plot(x='Year',kind='bar',stacked=True,ax=ax,colormap='Paired')
    ax.set(ylabel='THS06 * s')
    ax.set(title='CPU seconds by Type and Capacity')
    ax.set_ylim(ymax=plotMaxs['CPUSecondsByTypeAndCapacity'])
    ax.set_xlim(xmin=minYearVal)

    handles, labels = ax.get_legend_handles_labels()
    handles=handles[::-1]
    labels=labels[::-1]
    ax.legend(handles,labels,loc='best', markerscale=0.25, fontsize=11)

    fig = ax.get_figure()
    fig.tight_layout()
    fig.savefig('CPUSecondsByTypeAndCapacity'+pngKeyName+'.png')