Cost model notebook - Goals are to implement
1) A suite of models for cost evolution of a resource. Driven by json like interface for passing parameters 
2) Cost models per unit of resource for CMS CPU, disk, tape
3) Annual cost for CMS CPU, disk, tape assuming resources are purchased "just in time" - eg, for the year in which they are first needed. This cost also assumes a lifetime for each resource (parameter that is json driven)
4) Visualize results as table and figure showing annual costs.

In [1]:
#A totally generic cost evolution model. Currently supported evolutions are: 
# 1) 'Constant' : the cost never changes
# 2) 'ConstantEvolution' : An annual cost reduction per hs06, PB
import numpy
def costPerResourceUnit(datesToEstimate,modelParameters):
    model=modelParameters['model']
    if model == 'Constant':
        return modelParameters['cost0']
    if model == 'ConstantEvolution':
        t0=modelParameters['t0']
        baselineCost=modelParameters['cost0']
        reductionPerYear=modelParameters['annualCostReduction']
        return baselineCost*numpy.power(1.+reductionPerYear,-1*(datesToEstimate-t0))
    return None

def computeCost(datesToEstimate,neededResources,costs,modelParameters):
    resourceLifetime=modelParameters['lifetime']
    startOfLife=modelParameters['programStart'] #assume linear buying between start of program and datesToEstimate[0]
    
    #purely new stuff - can be negative...
    extraResourcesToPurchase = neededResources - numpy.append([0.],neededResources)[:-1]
    
    # now add in resources that must be replaced
    resourcesToPurchase = numpy.copy(extraResourcesToPurchase)
    for i,y in enumerate(datesToEstimate):
        if startOfLife > y + resourceLifetime: #no retirements yet, the project is too new
            continue
        if datesToEstimate[0] > y-resourceLifetime: #assume linear purchasing up to first year
            lostResources = neededResources[0] / float((datesToEstimate[0]-startOfLife)+1.)
        else:
            lostResources = resourcesToPurchase[i-resourceLifetime]
        resourcesToPurchase[i]+=lostResources
        if resourcesToPurchase[i]<0. : resourcesToPurchase[i]=0.
        
    costPerYear = resourcesToPurchase * costs
    return costPerYear

In [2]:
import numpy
def computeCosts(years,cost_models, needed_resources):
    costs={}
    costs['cpu'] = costPerResourceUnit(years,cost_models['cpu'])
    costs['disk'] = costPerResourceUnit(years,cost_models['disk'])
    costs['tape'] = costPerResourceUnit(years,cost_models['tape'])

    costPerYear={}
    costPerYear['cpu'] = computeCost(years,needed_resources['cpu'],costs['cpu'],
                                     cost_models['cpu'])
    costPerYear['disk'] = computeCost(years,needed_resources['disk'],costs['disk'],
                                      cost_models['disk'])
    costPerYear['tape'] = computeCost(years,needed_resources['tape'],costs['tape'],
                                      cost_models['tape'])

    return costPerYear

def printCosts(years,needed_resources,costsPerYear):
    print 'Year    kHS06  PBDisk  PBTape   Cost   Cost   Cost'
    for i,y in enumerate(years):
        print '%4d  %7.1f %7.1f %7.1f %6.1f %6.1f %6.1f' %(y,
                                                          needed_resources['cpu'][i]/1000.,
                                                          needed_resources['disk'][i],
                                                          needed_resources['tape'][i],
                                                          costsPerYear['cpu'][i],
                                                          costsPerYear['disk'][i],
                                                          costsPerYear['tape'][i])
    return


In [3]:
%matplotlib inline
import pylab
pylab.rcParams['figure.figsize'] = 10, 6
import numpy

def plotCosts(fignum,years,costList,costLabels):
    colList=['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']
    pylab.figure(fignum)
    #skip the first year as the cost for that year does not make sense
    pList=[]
    prevSum=numpy.zeros(len(costList[0])-1)
    for i in range(len(costList)):
        p=pylab.bar(years[1:]-0.4,costList[i][1:],0.8,bottom=prevSum,color=colList[i])
        prevSum=prevSum + costList[i][1:]
        pList.append(p)
    pylab.xlabel('Year',fontsize=15)
    pylab.ylabel('Annual Cost ($M)',fontsize=15)
    pylab.xlim(xmin=years[1]-0.5,xmax=years[-1]+0.5)
    pylab.legend(reversed(pList),reversed(costLabels),loc='best')
    



In [4]:
import numpy

def define_cost_model():
    years = numpy.arange(2016,2031)

    cost_models={}
    # $11/HS06 
    cost_models['cpu'] = { 'model': 'ConstantEvolution',
                           't0': 2017,
                           'cost0': 11.0*1e-6,
                           'annualCostReduction': 0.15,
                           'lifetime' : 6,
                           'programStart' : 2008}

    # $40/TB - translate into PB, but we need two copies of data at each site (this is the wrong spot for this factor!)
    # Frank/Brian agree that 110/TB of installed space is about right for US currently
    cost_models['disk'] = { 'model': 'ConstantEvolution',
                            't0': 2017,
                            'cost0': 110.0*1000.0*1e-6,
                            'annualCostReduction': 0.15,
                            'lifetime' : 6,
                            'programStart' : 2008}

    # $3/TB - I made this up - need feedback from Dave
    cost_models['tape'] = { 'model': 'ConstantEvolution',
                            't0': 2017,
                            'cost0': 10.0*1000.0*1e-6,
                            'annualCostReduction': 0.15,
                            'lifetime' : 6,
                            'programStart' : 2008}
    return cost_models

