In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
import itertools
import heapq

In [3]:
prev_data = np.array([[10, 1, 0.00998], [80,6,0.18178], [37,3,0.05118], [17,1,0.07539], [90,10,0.11807], [31,2,0.06987], [50,4,0.08516], [20,2,0.01614], [73,4,0.24060], [89,8,0.15184], [100,8,0.049]])

x = prev_data[:,0:2]
y = prev_data[:,2]

model = LinearRegression()
model.fit(x, y)

def predict_percentage(inhabitants, multiplier):
    return model.predict([[inhabitants, multiplier]])[0]

In [4]:
def check_accuracy(test_data, predict_percentage):
    """
    Evaluates the accuracy of the predict_percentage model.

    Parameters:
    - test_data: List of tuples in the form [(inhabitants, multiplier, true_percentage), ...]
    - predict_percentage: A function that takes (inhabitants, multiplier) and returns predicted percentage

    Returns:
    - Dictionary with MAE and RMSE
    """
    y_true = []
    y_pred = []

    for inhabitants, multiplier, true_pct in test_data:
        pred = predict_percentage(inhabitants, multiplier)
        y_true.append(true_pct)
        y_pred.append(pred)

    mae = mean_absolute_error(y_true, y_pred)
    rmse = root_mean_squared_error(y_true, y_pred)

    return {
        'Mean Absolute Error': round(mae, 7),
        'Root Mean Squared Error': round(rmse, 7),
        'Accuracy': round(100 - mae, 7)  # crude accuracy: lower MAE means better
    }

check_accuracy(prev_data, predict_percentage) 

{'Mean Absolute Error': 0.0340498,
 'Root Mean Squared Error': 0.0460207,
 'Accuracy': 99.9659502}

its too good, probably because training and testing on same data. Below it is used on data from last year:

In [5]:
old_data = np.array([[(24,2), (70,4), (41,3), (21,2), (60,4)],
                [(47,3), (82,5), (87,5), (80,5), (35,3)],
                [(73,4), (89,5), (100,8), (90,7), (17,2)],
                [(77,5), (83,5), (85,5), (79,5), (55,4)],
                [(12,2), (27,3), (52,4), (15,2), (30,3)]]).reshape(25,2)
old_res = np.array([[0.015,0.082,0.019,0,0.037],
[0.03,0.062,0.098,0.041,0.012],
[0.113,0.108,0.049,0.034,0.006],
[0.046,0.054,0.065,0.054,0.026],
[0,0,0.019,0,0]]).reshape(25)

combined_old_data = np.hstack((old_data, old_res.reshape(-1, 1)))

check_accuracy(combined_old_data, predict_percentage) 



{'Mean Absolute Error': 0.0804439,
 'Root Mean Squared Error': 0.0898621,
 'Accuracy': 99.9195561}

seems like it is still pretty good, hence we can use it to predict the percentages in the next round

In [6]:
new_data = np.array([
    [80, 6], [50, 4], [83, 7], [31, 2], [60, 4],   # Row A
    [89, 8], [10, 1], [37, 3], [70, 4], [90, 10],  # Row B
    [17, 1], [40, 3], [73, 4], [100, 15], [20, 2], # Row C
    [41, 3], [79, 5], [23, 2], [47, 3], [30, 2]    # Row D
])

preds = np.array([predict_percentage(m,h) for m,h in new_data]).clip(0)
combined_new_data = np.hstack((new_data, preds.reshape(-1, 1)))

print(combined_new_data)

[[8.00000000e+01 6.00000000e+00 1.48831701e-01]
 [5.00000000e+01 4.00000000e+00 9.20237523e-02]
 [8.30000000e+01 7.00000000e+00 1.32119470e-01]
 [3.10000000e+01 2.00000000e+00 7.65723255e-02]
 [6.00000000e+01 4.00000000e+00 1.29620591e-01]
 [8.90000000e+01 8.00000000e+00 1.26686290e-01]
 [1.00000000e+01 1.00000000e+00 2.56102482e-02]
 [3.70000000e+01 3.00000000e+00 7.11391455e-02]
 [7.00000000e+01 4.00000000e+00 1.67217429e-01]
 [9.00000000e+01 1.00000000e+01 7.44634076e-02]
 [1.70000000e+01 1.00000000e+00 5.19280349e-02]
 [4.00000000e+01 3.00000000e+00 8.24181970e-02]
 [7.30000000e+01 4.00000000e+00 1.78496480e-01]
 [1.00000000e+02 1.50000000e+01 0.00000000e+00]
 [2.00000000e+01 2.00000000e+00 3.52158035e-02]
 [4.10000000e+01 3.00000000e+00 8.61778808e-02]
 [7.90000000e+01 5.00000000e+00 1.73063300e-01]
 [2.30000000e+01 2.00000000e+00 4.64948549e-02]
 [4.70000000e+01 3.00000000e+00 1.08735984e-01]
 [3.00000000e+01 2.00000000e+00 7.28126417e-02]]


Using this as priori as we did in round2, we can find out the best options to go with.

In [None]:
def fee(n):
    """Compute the fee for a total of n expeditions.

    Parameters
    ----------
    n : int
        Number of expeditions.
    
    Returns
    -------
    float
        Fee.
    """
    if n == 1:
        return 0
    if n == 2:
        return -50
    if n == 3:
        return -150

def payoff(mults, hunts, shares):
    """Compute the final profit after the expeditions.

    Parameters
    ----------
    mults : list of int
        Multipliers for each destination.
    hunts : list of int
        Hunters for each destination.
    shares : list of int
        Shares for each destination.
    
    Returns
    -------
    float
        Profit.
    """
    
    val = 10 * sum([mult/(hunt + 100*share) for (mult, hunt, share) in zip(mults, hunts, shares)]) + fee(len(mults))

    if 17 in mults and 30 in mults and 31 in mults:
        print(mults,hunts,shares)
        print(val)
        for (mult, hunt, share) in zip(mults, hunts, shares):
            print(mult, hunt, share)
            print(hunt+100*share)
            print(mult/(hunt+100*share))
            print(100*mult/(hunt + 100*share))
    return val

def maximize_prior_top(arr, shares, k):
    """Given the prior, compute solutions that yield top k profits.

    Parameters
    ----------
    shares : list of int
        Shares for each destination.
    k : int
        Number of solutions
    
    Returns
    -------
    list of tuple
        Top k profits and optimal expeditions.
    """
    datas = [(mult, hunt, share) for ([mult, hunt], share) in zip(arr, shares)]
    heap = []
    iterables = [itertools.combinations(datas, n_exp) for n_exp in range(1, 4)]
    for (i, data) in enumerate(itertools.chain.from_iterable(iterables)):
        mults = [tupl[0] for tupl in data]
        hunts = [tupl[1] for tupl in data]
        shares = [tupl[-1] for tupl in data]
        val = payoff(mults, hunts, shares)
        expeditions = list(zip(mults, hunts))
        if i < k:
            heapq.heappush(heap, (val, expeditions))
        elif val > heap[0][0]:
            heapq.heappop(heap)
            heapq.heappush(heap, (val, expeditions))
    return sorted(heap, reverse=True)

In [28]:
print(maximize_prior_top(new_data,preds, 1))

[(10, 1), (100, 15), (20, 2)]
for i in range(1,2):
    shares = preds**i
    res = maximize_prior_top(new_data,shares, 1)
    print("Exponent:", i, "Profit:", f"{res[0][0]:.2f}", "Optimal expeditions:", res[0][1])

[31, 17, 30] [2, 1, 2] [0.07657232551281415, 0.05192803492636933, 0.07281264168924886]
768.7469475989528
31 2 0.07657232551281415
9.657232551281414
3.2100293573117513
321.0029357311751
17 1 0.05192803492636933
6.1928034926369335
2.7451218208703883
274.51218208703887
30 2 0.07281264168924886
9.281264168924887
3.2323182978073888
323.2318297807389
[(1463.1384313069284, [(89, 8), (90, 10), (100, 15)])]
[31, 17, 30] [2, 1, 2] [0.07657232551281415, 0.05192803492636933, 0.07281264168924886]
768.7469475989528
31 2 0.07657232551281415
9.657232551281414
3.2100293573117513
321.0029357311751
17 1 0.05192803492636933
6.1928034926369335
2.7451218208703883
274.51218208703887
30 2 0.07281264168924886
9.281264168924887
3.2323182978073888
323.2318297807389
Exponent: 1 Profit: 1463.14 Optimal expeditions: [(89, 8), (90, 10), (100, 15)]


In [17]:
for [a,b],c in zip(new_data, preds):
    print(f"{a}\t{b}\t{c}")

80	6	0.1488317010936363
50	4	0.09202375227361613
83	7	0.1321194696208629
31	2	0.07657232551281415
60	4	0.1296205905092691
89	8	0.12668628961878536
10	1	0.025610248161412268
37	3	0.07113914551073659
70	4	0.16721742874492207
90	10	0.07446340755541205
17	1	0.05192803492636933
40	3	0.08241819698143248
73	4	0.17849648021561793
100	15	0.0
20	2	0.0352158034535959
41	3	0.08617788080499779
79	5	0.17306330021354038
23	2	0.0464948549242918
47	3	0.10873598374638956
30	2	0.07281264168924886
