In [1]:
%config IPCompleter.greedy=True

In [2]:
%matplotlib inline

In [3]:
import sys, os, re
import locale
import math
import logging, inspect, pprint
import configparser
import functools

import numpy as np
import pandas as pd

In [4]:
import matplotlib.pyplot as plt

In [5]:
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')

'en_US.UTF-8'

# input_data.py contains our sample data, including hull quantites and me values

In [6]:
import input_data

### what mineral types do we need to build these hulls (and what are their prices)

In [7]:
mineral_ids = set(sum([[y for y in input_data.ore_yield_dict[x].keys()] for x in input_data.ore_yield_dict.keys()], []))
mineral_prices = dict(zip(mineral_ids, [input_data.item_prices_dict[x] for x in mineral_ids]))

### how many of those types to build what we want

In [8]:
def calc_qty(count, quantity, me, fm):
    # print("count:{}, quantity:{}, me:{}, fm:{}".format(count, quantity, me, fm))
    return round(max(float(count),
        math.ceil(round(float(count)*(float(quantity)*(1.0-float(me)/100.0)*float(fm)), 2))))


# add up the minerals for each ship order
total_minerals = dict(zip(mineral_ids, [0] * len(mineral_ids)))
for ship_id in input_data.ship_build_dict.keys():
    ship_qty = input_data.ship_build_dict.get(ship_id, 0)
    ship_me = input_data.ship_me_dict.get(ship_id, 0)
    ship_minerals = dict(zip(mineral_ids,
        [calc_qty(ship_qty, input_data.ship_requirements_dict[ship_id].get(mineral_id, 0), ship_me, 1.0) for mineral_id in sorted(list(mineral_ids))])
        )
    print("{:12s} x {:3} ({:2}) = {}".format(input_data.item_info_dict[ship_id]['name'], ship_qty, ship_me, ship_minerals))
    total_minerals = dict(zip(mineral_ids, 
        [total_minerals[x] + ship_minerals[x] for x in mineral_ids]
    ))

print("{:12s}            = {}".format('Total', total_minerals))

required_minerals_df = pd.DataFrame.from_dict(dict(zip(total_minerals.keys(), [{'quantity':total_minerals.get(x, 0)} for x in total_minerals.keys()]))).astype(np.double)
required_minerals_series = required_minerals_df.loc['quantity',:]
required_minerals_df.head()

Thrasher     x  10 (10) = {34: 431163, 35: 103347, 36: 35793, 37: 15813, 38: 10, 39: 1620, 40: 234}
Vexor        x   1 (10) = {34: 560000, 35: 120000, 36: 37000, 37: 9100, 38: 2601, 39: 1181, 40: 321}
Total                   = {34: 991163, 35: 223347, 36: 72793, 37: 24913, 38: 2611, 39: 2801, 40: 555}


Unnamed: 0,34,35,36,37,38,39,40
quantity,991163.0,223347.0,72793.0,24913.0,2611.0,2801.0,555.0


### how much would it cost to just buy the minerals directly

In [9]:
minerals_cost = np.int(sum([total_minerals.get(x, 0) * mineral_prices.get(x, 0) for x in mineral_ids]))
print("minerals_cost: {:n}".format(minerals_cost))

minerals_cost: 28,219,556


### refining efficiency - this is the variable that makes the most difference

In [10]:
refining_efficiency = 0.50

### inputs (and their yield) - some experimentation here to reduce the number of inputs to make the optimization problem smaller (faster)

In [11]:
# start with everything
ore_yield_df = pd.DataFrame.from_dict(input_data.ore_yield_dict).fillna(0)
ore_yield_df = ore_yield_df.mul(refining_efficiency).apply(np.floor)

# only keep the compressed minerals
ore_yield_df = ore_yield_df[list(filter(lambda x: input_data.item_info_dict[x].get('name', '').split()[0] == 'Compressed', list(ore_yield_df.columns)))]

# cut this down even more - only keep the 'basic' compressed ore types "Compressed Foo", not "Compressed Sparkling Foo"
ore_yield_df = ore_yield_df[list(filter(lambda x: len(input_data.item_info_dict[x].get('name', '').split()) == 2, list(ore_yield_df.columns)))]

# include the minerals, they may be better than the ore - assume 1x mineral refines to .. 1x mineral
# mineral_yield_df = pd.DataFrame.from_dict(dict(zip(mineral_ids, [{x:1} for x in mineral_ids]))).fillna(0).apply(np.floor)
# inputs_yield_df = pd.concat([ore_yield_df, mineral_yield_df], axis=1).apply(np.floor)

inputs_yield_df = ore_yield_df
inputs_yield_df.head()

Unnamed: 0,28367,28388,28391,28397,28401,28403,28406,28410,28416,28420,28422,28424,28429,28432
34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,24000.0,87.0,0.0,75.0,200.0
36,600.0,600.0,1000.0,750.0,0.0,0.0,75.0,30.0,0.0,0.0,35.0,15.0,0.0,0.0
37,0.0,0.0,0.0,400.0,0.0,120.0,0.0,60.0,37.0,500.0,0.0,0.0,0.0,0.0
38,0.0,0.0,400.0,0.0,60.0,45.0,25.0,0.0,0.0,80.0,0.0,0.0,0.0,0.0
39,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,0.0,0.0,0.0


### prices of all the things and also a subset for the inputs for convenience

In [12]:
all_packaged_volumes_df = pd.DataFrame.from_dict(dict(zip(input_data.item_info_dict.keys(), [{'volume':input_data.item_info_dict[x].get('packagedVolume', 0)} for x in input_data.item_info_dict.keys()])))
all_price_df = pd.DataFrame.from_dict(dict(zip(input_data.item_prices_dict.keys(), [{'price':input_data.item_prices_dict[x]} for x in input_data.item_prices_dict.keys()])))
inputs_price_df = all_price_df[inputs_yield_df.columns]
inputs_price_df.head() 

Unnamed: 0,28367,28388,28391,28397,28401,28403,28406,28410,28416,28420,28422,28424,28429,28432
price,368500.0,331800.0,953400.0,330700.0,249900.0,198900.0,109400.0,26500.0,15180.0,1217000.0,7890.0,5345.0,2367.0,1902.0


# yabox.algorithms.DE

In [13]:
import yabox.algorithms

## in order to use the multiprocessing options, we need to be able to pickle the loss function. using pandas makes this hard

In [14]:
def summarize_results(results_fitness, results_data):
    
    result_series = pd.Series(data=results_data, index=inputs_yield_df.columns).apply(np.ceil)
    result_cost = np.int(inputs_price_df[result_series.index].dot(result_series))
    result_volume = np.int(np.ceil(all_packaged_volumes_df[result_series.index].dot(result_series)))

    if results_fitness != None:
        print("fitness: {:n}".format(np.float(results_fitness)))

    print("result_cost: {:n}".format(result_cost))
    print("({:.2f}x the cost of the required minerals cost)".format(result_cost/minerals_cost))
    print("result_volume: {:n}".format(result_volume))

    print("")
    actual_minerals_series = inputs_yield_df.mul(result_series).sum(axis=1)
    excess_minerals_series = actual_minerals_series - required_minerals_series
    print("excess_minerals_series")
    print(pd.DataFrame(excess_minerals_series).T)
    if excess_minerals_series.min() < 0:
        print("MISSING MINERALS")
        print(pd.DataFrame(excess_minerals_series[excess_minerals_series.lt(0)]).T)
    
    result_mineral_cost = np.int(all_price_df[actual_minerals_series.index].dot(actual_minerals_series).apply(np.int))
    result_mineral_volume = np.int(np.ceil(all_packaged_volumes_df[actual_minerals_series.index].dot(actual_minerals_series)))

    print("")
    print("result_mineral_cost: {:n}".format(result_mineral_cost))
    print("({:.2f}x the cost of the required minerals cost)".format(result_mineral_cost/minerals_cost))
    print("result_mineral_volume: {:n}".format(result_mineral_volume))

In [15]:
def get_input_bounds(required_minerals_df, inputs_yield_df):
    required_minerals_series = pd.Series(data=required_minerals_df.T.quantity)
    inverted_inputs_yield_df = (1.0 / inputs_yield_df).replace(np.inf, 0).fillna(0)
    max_bound_series = pd.Series(data=inverted_inputs_yield_df.apply(lambda x: np.asarray(x) * np.asarray(required_minerals_series)).apply(np.ceil).max())
    min_bound_series = pd.Series(data=np.zeros(len(max_bound_series.index)), index=max_bound_series.index)
    return list(zip(min_bound_series, max_bound_series))


def get_maximum_cost(required_minerals_df, inputs_price_df, inputs_yield_df):
    required_minerals_series = pd.Series(data=required_minerals_df.T.quantity)
    inverted_inputs_yield_df = (1.0 / inputs_yield_df).replace(np.inf, 0).fillna(0)
    max_bound_series = pd.Series(inverted_inputs_yield_df.apply(lambda x: np.asarray(x) * np.asarray(required_minerals_series)).apply(np.ceil).max(), name='max')
    return np.double(inputs_price_df[max_bound_series.index].dot(max_bound_series))


def get_loss_function(all_price_df, required_minerals_df, inputs_yield_df, maximum_cost):

    yield_price_series = all_price_df[inputs_yield_df.columns].loc['price',:]
    mineral_price_series = all_price_df[required_minerals_df.columns].loc['price',:]    
    required_minerals_series = required_minerals_df.loc['quantity',:]

    def objective(candidate):
        candidate_cost = np.double(yield_price_series.dot(candidate))
        if candidate_cost < 0: 
            print("cost:{}. candidate:{}".format(candidate_cost, candidate))
            return maximum_cost
        
        # add an extra penalty if we are missing any minerals
        # this make the loss function *slow*
        actual_minerals_series = inputs_yield_df.mul(candidate).sum(axis=1)
        excess_minerals_series = (actual_minerals_series - required_minerals_series)
        if excess_minerals_series.min() < 0:
            #print("excess_minerals_series:{}".format(excess_minerals_series))
            # excess_minerals_series = excess_minerals_series[excess_minerals_series.lt(0)]
            missing_minerals_price = np.abs(np.int(mineral_price_series[excess_minerals_series.index].dot(excess_minerals_series)))
            candidate_cost += 1000000.0 * missing_minerals_price

        return candidate_cost - maximum_cost

    return objective


In [16]:
loss_function = get_loss_function(all_price_df, required_minerals_df, inputs_yield_df, get_maximum_cost(required_minerals_df, inputs_price_df, inputs_yield_df))
input_bounds = get_input_bounds(required_minerals_df, inputs_yield_df)

In [17]:
rval = yabox.algorithms.DE(loss_function, input_bounds, maxiters=1000).solve(show_progress=True)

Optimizing (DE):   0%|          | 0/1000 [00:00<?, ?it/s]

In [18]:
rval

(array([[9.44682165e+00, 3.54270622e+01, 6.08844172e+00, 1.98210584e+01,
         4.21248676e+00, 2.27141886e+00, 2.77996815e+00, 1.75013716e+02,
         1.49779114e+02, 1.45160131e+00, 3.84874174e+02, 4.29191704e+02,
         2.49481283e+03, 4.20890550e+03]]),
 -1883539776.1493366)

In [19]:
summarize_results(rval[1], rval[0][0])

fitness: -1.88354e+09
result_cost: 59,714,183
(2.12x the cost of the required minerals cost)
result_volume: 1,709

excess_minerals_series
         34       35      36     37     38     39    40
0  119257.0  12553.0  2237.0  557.0  859.0  159.0  85.0

result_mineral_cost: 30,782,512
(1.09x the cost of the required minerals cost)
result_mineral_volume: 14,539


# scipy.optimize.differential_evolution

In [20]:
import scipy.optimize

### someimtes the scipy optimizer violates (lower) bounds and I have no idea why.

In [21]:
scipy_input_bounds = scipy.optimize.Bounds(list(map(lambda x: x[0], input_bounds)), list(map(lambda x: x[1], input_bounds)))

In [22]:
rval = scipy.optimize.differential_evolution(loss_function, scipy_input_bounds)

In [23]:
rval

     fun: -1866689655.076872
 message: 'Optimization terminated successfully.'
    nfev: 17535
     nit: 81
 success: True
       x: array([9.53677125e+00, 4.49771951e+01, 7.01879133e+00, 2.33736094e+01,
       1.81385868e+01, 3.95491207e+00, 5.67489227e+01, 2.49061734e+02,
       1.16407080e+02, 2.83742763e+00, 1.70309108e+02, 8.17695020e+01,
       4.33487585e+03, 3.11503537e+03])

In [24]:
summarize_results(rval.fun, rval.x)

fitness: -1.86669e+09
result_cost: 75,986,617
(2.69x the cost of the required minerals cost)
result_volume: 1,910

excess_minerals_series
        34        35      36      37      38     39     40
0  44039.0  100158.0  5197.0  5996.0  3574.0  919.0  105.0

result_mineral_cost: 36,620,178
(1.30x the cost of the required minerals cost)
result_mineral_volume: 14,782


# PyMathProg (pymprog as used by https://github.com/sergey-koumirov/AesPublica/blob/master/app/services/optimize.py)

In [25]:
# import pymprog