In [1]:
import xlwings as xw
import pandas as pd
from ortools.linear_solver import pywraplp
import numpy as np

In [2]:
# settings
## to do: YAML these

sample_size = 100 # initial sample to optimize on

In [3]:
# wb = xw.Book()  # this will create a new workbook
wb = xw.Book(r'myproject/myproject.xlsm')  # connect to an existing file in the current working directory
# wb = xw.Book(r'C:\path\to\file.xlsx')  # on Windows: use raw strings to escape backslashes

Instantiate a sheet object.

In [4]:
sht = wb.sheets['temp_sheet']


Reading and writing is easy:

In [5]:
sht.range('A1').value = 'Foo 1'
sht.range('A1').value

'Foo 1'

Read in product and channel information from Excel.

In [6]:
customer_sheet = wb.sheets['customer_data']
product_sheet = wb.sheets['products']
channel_sheet = wb.sheets['channels']
scenario_sheet = wb.sheets['Scenario']

In [7]:
product_probs_all = customer_sheet.range('A1').options(pd.DataFrame, expand='table').value
products_df = product_sheet.range('A1').options(pd.DataFrame, expand='table').value

products = products_df.index
productValue = products_df.iloc[:,0]

channels_df = channel_sheet.range('A1').options(pd.DataFrame, expand='table').value
channels = channels_df.index
cost = channels_df['cost']
factor = channels_df['factor']

Get the available marketing budget from the `Scenario` sheet.

In [8]:
budget_range = scenario_sheet.range('budgetConstraints').value
availableBudget_total = budget_range[1]
availableBudget = availableBudget_total/sample_size # scale to sample_size for initial optimization
print("Sampled avilable budget: %d" % availableBudget)

Sampled avilable budget: 1851


Create a sample of size `sample_size` for the initial optimization.

In [9]:
product_probs = product_probs_all.sample(n=sample_size, random_state=2058)

Instantiate the solver as an MIP problem.

In [10]:
solver = pywraplp.Solver('SolveCampaignProblem', pywraplp.Solver.CBC_MIXED_INTEGER_PROGRAMMING)

Define the number of customers, the number of offers and the number of channels as $x_{ijk}$.

In [11]:
num_customers = product_probs.shape[0]
num_products = len(products)
num_channels = len(channels)

x = {}

for i in range(num_customers):
    for j in range(num_products):
        for k in range(num_channels):
            x[i, j, k] = solver.IntVar(0, 1, 'x[%i,%i,%i]' % (i, j, k))

In [12]:
print('Number of customers: %d' % num_customers)
print('Number of products: %d' % num_products)
print('Number of channels: %d' % num_channels)

Number of customers: 100
Number of products: 4
Number of channels: 3


## Set up the constraints

  1. Offer only one product per customer. _(TO DO: update this.)_
  2. Adhere to budget, channel and product constraints from the Excel spreadsheet.
  3. Adhere to number of offer constraints
  


In [13]:
    ## offer only one product per customer
    for i in range(num_customers):
        solver.Add(solver.Sum([x[i, j, k] 
                               for j in range(num_products)
                               for k in range(num_channels)
                              ]) <= 1) # *** MAGIC NUMBER ALERT!!! ***

In [14]:
    ## Do not exceed the budget
    solver.Add(solver.Sum([x[i, j, k]*cost[k]
                           for i in range(num_customers)
                           for j in range(num_products)
                           for k in range(num_channels)
                          ]) <= availableBudget)

<ortools.linear_solver.pywraplp.Constraint; proxy of <Swig Object of type 'operations_research::MPConstraint *' at 0x1188356c0> >

In [25]:
cost

channel
gift          20.0
newsletter    15.0
seminar       23.0
Name: cost, dtype: float64

### Get the channel constraints

In [15]:
# channel minima
channelConstraints_n_range = scenario_sheet.range('channelConstraints_n').options(numbers=int).value
channelConstraints_df = pd.DataFrame(channelConstraints_n_range, index=channels, columns=['n_min', 'n_max'])

Adjust the constraints for the sample size.

In [16]:
channelConstraints_df['n_min_adjusted'] = channelConstraints_df['n_min']/sample_size

### Set the channel constraints

In [17]:
# minimums for channel
for k in range(num_channels):
    solver.Add(solver.Sum([x[i, j, k]
        for i in range(num_customers)
        for j in range(num_products)
        ]) >= channelConstraints_df.loc[channels[k], 'n_min_adjusted'])

### Get the product constraints

In [18]:
# product minima
productConstraints_n_range = scenario_sheet.range('productConstraints_n').options(numbers=int).value
productConstraints_df = pd.DataFrame(productConstraints_n_range, index=products, columns=['n_min', 'n_max'])

Adjust the constraints for the sample size.

In [19]:
productConstraints_df['n_min_adjusted'] = productConstraints_df['n_min']/sample_size

### Set the channel constraints

In [20]:
# minima for product
for j in range(num_products):
    solver.Add(solver.Sum([x[i, j, k]
        for i in range(num_customers)
        for k in range(num_channels)
        ]) >= productConstraints_df.loc[products[j], 'n_min_adjusted'])

## Set the _objective function_

Set to maximise the revenue $R$. Here $x_{ijk}$ denotes whether customer $i$ receives an offer for product $j$ over channel $k$, $f_k$ denotes the channel adjustment factor, $v_j$ the product value and $p_{ij}$ the probability that customer $i$ takes up product $j$.

$ \max R = \sum_{ijk} x_{ijk} \times f_k \times v_j \times p_{ij}$


> At some point, need to be able to specify 
  1. What to optimize, and 
  2. Whether to maximise or minimise.  

> At the moment we maximise revenue, this could be profit, we could minimise budget, maximise profit or maximise ROI.

In [21]:
#    solver.Minimize(solver.Sum([cost[i][j] * x[i, j] for i in range(num_workers)
#                                                     for j in range(num_tasks)]))

solver.Maximize(solver.Sum([x[i, j, k]*factor[k]*productValue[j]*product_probs[products[j]].iloc[i]
                           for i in range(num_customers)
                           for j in range(num_products)
                           for k in range(num_channels)]))

### Invoke the solver

> Need a routine here to evaluate whether the solver is solving. That is, set the most iterations and a time limit.

In [22]:
# Invoke the solver
# t = time.process_time()
sol = solver.Solve()
# elapsed_time = time.process_time() - t

In [23]:
sol

0

Print out the solution. We can print out more information about the constraints. What happens in `xlwings` when the python routine prints – does it go to the logs?

In [36]:
report = [(channels[k], products[j], product_probs.name.iloc[i], x[i, j, k].solution_value()*cost[k],
          x[i, j, k].solution_value()*factor[k]*productValue[j]*product_probs[products[j]].iloc[i]) 
          for i in range(num_customers) 
          for j in range(num_products) 
          for k in range(num_channels)  if x[i, j, k].solution_value() > 0]

report_bd = pd.DataFrame(report, columns=['channel', 'product', 'customer', 'cost', 'revenue'])

print('Total revenue = %d' % (solver.Objective().Value()))
print('Total budget  = %d' % (report_bd['cost'].sum()) )

display(report_bd).head(10)

Total revenue = 3988
Total budget  = 1845


Unnamed: 0,channel,product,customer,cost,revenue
0,seminar,Pension,Brandon Trujillo,23.0,88.999706
1,seminar,Pension,Chelsea Carter,23.0,65.956422
2,seminar,Savings,Matthew Rodriguez,23.0,42.148905
3,seminar,Pension,Ann Schneider,23.0,55.401253
4,gift,Pension,Kevin Price,20.0,17.790143
5,seminar,Pension,Peter Bishop,23.0,87.090482
6,seminar,Pension,Roberto Barker,23.0,78.861178
7,seminar,Pension,Ronald Hall,23.0,51.553407
8,seminar,Mortgage,Megan Cook,23.0,60.967503
9,seminar,Mortgage,Miranda Merritt,23.0,37.626029


AttributeError: 'NoneType' object has no attribute 'head'

Channel counts.

In [37]:
report_bd.groupby(['channel', 'product']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,customer,cost,revenue
channel,product,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gift,Car loan,6,6,6
gift,Mortgage,1,1,1
gift,Pension,1,1,1
gift,Savings,2,2,2
newsletter,Car loan,10,10,10
seminar,Mortgage,15,15,15
seminar,Pension,36,36,36
seminar,Savings,14,14,14


The sample has given us the rough outline of the optimization. Using these figures, replicate using non-linear minimization.

In [59]:
n_obs_orig = num_customers
n_obs_new = product_probs_all.shape[0]

In [61]:
product_probs = product_probs_all

n_obs = product_probs.shape[0]

adjustment_factor = n_obs/n_obs_orig
availableBudget = availableBudget_total

product_probs.head()

Unnamed: 0_level_0,name,Car loan,Savings,Mortgage,Pension
customerid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,Matthew Harvey,0.0,0.0,0.0,0.0
1.0,Joshua Wilcox,0.0,0.0,0.179932,0.0
2.0,Yolanda Vasquez,0.330731,0.580556,0.0,0.0
3.0,Jessica Alvarado,0.0,0.630242,0.509746,0.0
4.0,Gregory Martinez,0.0,0.320511,0.0,0.288832


In [62]:
num_customers = product_probs.shape[0]

In [63]:
offer_scale = int(n_obs_new/n_obs_orig)

# get the offers from the original optimization by product and channel
sample_counts = pandas.pivot_table(report_bd, index='channel', columns='product', values='customer', 
                                   aggfunc=len, fill_value=0)

offers = sample_counts.stack()*offer_scale

In [66]:
offers

channel     product 
gift        Car loan     600
            Mortgage     100
            Pension      100
            Savings      200
newsletter  Car loan    1000
            Mortgage       0
            Pension        0
            Savings        0
seminar     Car loan       0
            Mortgage    1500
            Pension     3600
            Savings     1400
dtype: int64

In [67]:
product_profit = product_probs[products]*productValue
product_profit_0 = product_profit*factor[0]
product_profit_1 = product_profit*factor[1]
product_profit_2 = product_profit*factor[2]
product_profit_0.columns = [pp + ' ' + channels[0] for pp in products]
product_profit_1.columns = [pp + ' ' + channels[1] for pp in products]
product_profit_2.columns = [pp + ' ' + channels[2] for pp in products]
product_profit = pandas.concat([product_profit_0, product_profit_1, product_profit_2], axis=1)

# The world of R

As of yet, the non-linear minimization in Python has not worked properly, but it _has_ with R and `nlm()`. Until I can get it to work, the workaround is to use `rpy2` to run R from Python.

> **To do:** Get the non-linear minimization right in Python.

Import the requisite libraries.

In [39]:
import rpy2.robjects as robjects

In [74]:
from rpy2.robjects.packages import importr
# import R's "base" package
base = importr('base')

# import R's "utils" package
utils = importr('utils')
stats = importr('stats')

In [41]:
# import rpy2's package module
import rpy2.robjects.packages as rpackages

# import R's utility package
utils = rpackages.importr('utils')

# select a mirror for R packages
utils.chooseCRANmirror(ind=1) # select the first mirror in the list

rpy2.rinterface.NULL

Install packages using R's `install.package`. (I should not have to do this again.)

In [51]:
# R package names
packnames = ('magrittr', 'dplyr', 'data.table', 'dtplyr', 'stringr')

# R vector of strings
from rpy2.robjects.vectors import StrVector

# Selectively install what needs to be install.
# We are fancy, just because we can.
for x in packnames:
    if not(rpackages.isinstalled(x)):
        utils.install_packages(StrVector(names_to_install))

All I need to run in R is the non-linear minimization, and whatever is needed to supply the appropiate data.

### The dual function (R)

```
dual <- function(u) {
  if (dim(product_profit)[2] != length(u)) {
    print(c(dim(product_profit)[2], length(u)))
    stop("Mismatched dimensions")
    }
  d <- sweep(product_profit, 2, u)
  v <- apply(d, 1, max) 
  v[v < 0] <- 0
  y <- offers%*%u + sum(v)
  y
}
```

### The opimisation (R)

```{r}
u_init <- offers*0
out <- nlm(dual, u_init, print.level = 1)
```

### Getting the solution (R)

```{r}
mindual <- out$minimum
u <- out$estimate
mindual
u
```

In [82]:
robjects.r('''
        # create a function `dual`
            dual <- function(u, pp, offers) {
              if (dim(pp)[2] != length(u)) {
                print(c(dim(pp)[2], length(u)))
                stop("Mismatched dimensions")
                }
              d <- sweep(pp, 2, u)
              v <- apply(d, 1, max) 
              v[v < 0] <- 0
              y <- offers%*%u + sum(v)
              y
            }
        ''')

R object with classes: ('function',) mapped to:
<SignatureTranslatedFunction - Python:0x12232b248 / R:0x7fcdb3a332e0>

### Test the new function

To do this, need to create the `product_profit` array in R.

In [52]:
from rpy2.robjects import r, pandas2ri
pandas2ri.activate()

In [71]:
r_product_profit = pandas2ri.py2ri(product_profit)

<class 'rpy2.robjects.vectors.DataFrame'>


In [86]:
u_test = robjects.FloatVector([11.2, 15, 6.02, 19.5, 0, 4.98, 2.23, 7.75, 50.2, 23.2, 9.09, 35.2])
r_offers = robjects.IntVector(offers)

c(600L, 100L, 100L, 200L, 1000L, 0L, 0L, 0L, 0L, 1500L, 3600L, 
1400L)


In [83]:
dual = robjects.r['dual']

In [84]:
print(dual.r_repr())

function (u, pp, offers) 
{
    if (dim(pp)[2] != length(u)) {
        print(c(dim(pp)[2], length(u)))
        stop("Mismatched dimensions")
    }
    d <- sweep(pp, 2, u)
    v <- apply(d, 1, max)
    v[v < 0] <- 0
    y <- offers %*% u + sum(v)
    y
}


In [87]:
dual(u_test, pp=r_product_profit, offers=r_offers)

0
392929.108463


In [91]:
u_init = robjects.FloatVector(offers*0)
r_out = stats.nlm(dual, p=u_init, pp=r_product_profit, offers=r_offers, print_level=1)

iteration = 0

Step:

 [1]
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0


Parameter:

 [1]
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0


Function Value

[1]
 444098.6


Gradient:

 [1]
   600.0000
   100.0000
   100.0000
   200.0000
  1000.0000
     0.0000


 [7]
     0.0000
     0.0000
  -311.0000
  -427.9999
   176.0000
 -1978.0000




iteration = 38

Parameter:

 [1]
 4.913197e+00
 1.940439e+01
 1.440843e+01
 3.464217e+01
 2.600312e-07


 [6]
 5.015150e+00
 6.741571e+00
 8.683146e+00
 1.502853e+01
 3.020966e+01


[11]
 2.216741e+01
 5.453634e+01


Function Value

[1]
 377409.3


Gradient:

 [1]
    0.00000000
    0.25487755
    0.21914910
    0.00000000
 -312.92123051


 [6]
    0.00000000
    0.00000000
    0.00000000
    0.00000000
   -0.35793251


[11]
   -0.01505956
   -0.26847066




Successive iterates within tolerance.

Current iterate is probably solution.





In [107]:
r_u = r_out.rx('estimate')[0]

In [110]:
u = [r_u[i] for i in range(len(r_u))] # ugly way to convert

In [111]:
d = product_profit.sub(u) 
v = d.max(axis = 1)
v[v<0] = 0
ndx = np.argsort(-v)

In [119]:
pd.melt(d, id_vars=).head()

Unnamed: 0,variable,value
0,Car loan gift,-4.913197
1,Car loan gift,-4.913197
2,Car loan gift,1.701431
3,Car loan gift,-4.913197
4,Car loan gift,-4.913197


In [None]:
d.

In [114]:
# ```{r}
# d_DT <- data.table(d)
# d_DT[, customerid := product_probs$customerid]
# d_DT[, v := v]

# d_DT_melt <- melt(d_DT, id.vars = c("customerid", "v"))
# d_DT_alloc <- d_DT_melt[order(customerid, -value)][, lapply(.SD, head, 1), by = .(customerid)][seq(sum(offers))]

# # check counts
# d_DT_alloc[, .N, by = .(variable)]
# ```

[4.913196720841552,
 19.40438530078488,
 14.408426026875894,
 34.64217001924421,
 2.6003117952399604e-07,
 5.015149745980467,
 6.741571315184431,
 8.683146105396691,
 15.028531556600194,
 30.209656694586542,
 22.167408318358905,
 54.53633509493715]

In [115]:
d.head(10)

Unnamed: 0_level_0,Car loan gift,Savings gift,Mortgage gift,Pension gift,Car loan newsletter,Savings newsletter,Mortgage newsletter,Pension newsletter,Car loan seminar,Savings seminar,Mortgage seminar,Pension seminar
customerid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0.0,-4.913197,-19.404385,-14.408426,-34.64217,-2.600312e-07,-5.01515,-6.741571,-8.683146,-15.028532,-30.209657,-22.167408,-54.536335
1.0,-4.913197,-19.404385,-3.61253,-34.64217,-2.600312e-07,-5.01515,-4.042597,-8.683146,-15.028532,-30.209657,-5.973564,-54.536335
2.0,1.701431,3.817874,-14.408426,-34.64217,1.653657,0.790415,-6.741571,-8.683146,-5.106591,4.623732,-22.167408,-54.536335
3.0,-4.913197,5.805306,16.176307,-34.64217,-2.600312e-07,1.287273,0.904612,-8.683146,-15.028532,7.60488,23.709691,-54.536335
4.0,-4.913197,-6.583944,-14.408426,-11.535604,-2.600312e-07,-1.810039,-6.741571,-2.906505,-15.028532,-10.978995,-22.167408,-19.876486
5.0,-4.913197,-4.322061,-14.408426,-34.64217,-2.600312e-07,-1.244569,-6.741571,-8.683146,-15.028532,-7.58617,-22.167408,-54.536335
6.0,-4.913197,15.706449,-0.935321,12.860157,-2.600312e-07,3.762559,-3.373295,3.192436,-15.028532,22.456595,-1.95775,16.717155
7.0,-4.913197,3.472281,23.704466,-34.64217,-2.600312e-07,0.704017,2.786652,-8.683146,-15.028532,4.105342,35.00193,-54.536335
8.0,-4.913197,-19.404385,-14.408426,-34.64217,-2.600312e-07,-5.01515,-6.741571,-8.683146,-15.028532,-30.209657,-22.167408,-54.536335
9.0,5.851129,-19.404385,-14.408426,15.360953,2.691081,-5.01515,-6.741571,3.817635,1.117958,-30.209657,-22.167408,20.46835


In [116]:
product_profit.head(10)

Unnamed: 0_level_0,Car loan gift,Savings gift,Mortgage gift,Pension gift,Car loan newsletter,Savings newsletter,Mortgage newsletter,Pension newsletter,Car loan seminar,Savings seminar,Mortgage seminar,Pension seminar
customerid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1.0,0.0,0.0,10.795896,0.0,0.0,0.0,2.698974,0.0,0.0,0.0,16.193844,0.0
2.0,6.614627,23.222259,0.0,0.0,1.653657,5.805565,0.0,0.0,9.921941,34.833389,0.0,0.0
3.0,0.0,25.209691,30.584733,0.0,0.0,6.302423,7.646183,0.0,0.0,37.814537,45.877099,0.0
4.0,0.0,12.820441,0.0,23.106566,0.0,3.20511,0.0,5.776642,0.0,19.230662,0.0,34.659849
5.0,0.0,15.082324,0.0,0.0,0.0,3.770581,0.0,0.0,0.0,22.623487,0.0,0.0
6.0,0.0,35.110835,13.473105,47.502327,0.0,8.777709,3.368276,11.875582,0.0,52.666252,20.209658,71.25349
7.0,0.0,22.876666,38.112892,0.0,0.0,5.719166,9.528223,0.0,0.0,34.314999,57.169339,0.0
8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9.0,10.764326,0.0,0.0,50.003123,2.691082,0.0,0.0,12.500781,16.146489,0.0,0.0,75.004685


There are many **convenience functions** available.

In [None]:
# read_value = sht.range("D5").options(numbers=int).value
read_value = sht.range("D4").options(numbers=int).value
print(read_value*2)
sht.range("D6").value = read_value * 2

Read a named range called `test_value`.

In [None]:
test_value = sht.range("test_value").options(numbers=int).value
print(test_value)

Write to the named range.

In [None]:
sht.range("test_value").value = test_value + 1
sht.range("test_value").value

In [None]:
sht.range('A1').value = [['Foo 1', 'Foo 2', 'Foo 3'], [10.0, 20.0, 30.0]]
sht.range('A1').expand().value

**Powerful converters** handle most data types of interest, including Numpy arrays and Pandas DataFrames in both directions:

In [None]:
import pandas as pd
df = pd.DataFrame([[1,2], [3,4]], columns=['a', 'b'])
sht.range('A1').value = df
sht.range('A1').options(pd.DataFrame, expand='table').value

**Matplotlib figures** can be shown as pictures in Excel:

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure()
plt.plot([1, 2, 3, 4, 5])
sht.pictures.add(fig, name='MyPlot', update=True)

Shortcut for the active sheet: `xw.Range`

If you want to quickly talk to the active sheet in the active workbook, you don’t need instantiate a workbook and sheet object, but can simply do:

In [None]:
xw.Range('A1').value = 'Foo'
xw.Range('A1').value
'Foo'

## 2. Macros: Call Python from Excel

