In [11]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

In [None]:
prev_data = np.array([[10, 1, 0.00998], [80,6,0.18178], [37,3,0.05118], [17,1,0.07539], [90,10,0.11807], [31,2,0.06987], [50,4,0.08516], [20,2,0.01614], [73,4,0.24060], [89,8,0.15184]])

x = prev_data[:,0:2]
y = prev_data[:,2]

model = LinearRegression()
model.fit(x, y)

def predict_percentage(inhabitants, multiplier):
    return model.predict([[inhabitants, multiplier]])[0]

0.006672627689203542

In [None]:
def check_accuracy(test_data, predict_percentage):
    """
    Evaluates the accuracy of the predict_percentage model.

    Parameters:
    - test_data: List of tuples in the form [(inhabitants, multiplier, true_percentage), ...]
    - predict_percentage: A function that takes (inhabitants, multiplier) and returns predicted percentage

    Returns:
    - Dictionary with MAE and RMSE
    """
    y_true = []
    y_pred = []

    for inhabitants, multiplier, true_pct in test_data:
        pred = predict_percentage(inhabitants, multiplier)
        y_true.append(true_pct)
        y_pred.append(pred)

    mae = mean_absolute_error(y_true, y_pred)
    rmse = root_mean_squared_error(y_true, y_pred)

    return {
        'Mean Absolute Error': round(mae, 7),
        'Root Mean Squared Error': round(rmse, 7),
        'Accuracy': round(100 - mae, 7)  # crude accuracy: lower MAE means better
    }

check_accuracy(prev_data, predict_percentage) 

{'Mean Absolute Error': 0.0149438,
 'Root Mean Squared Error': 0.0174193,
 'Accuracy': 99.9850562}

its too good, probably because training and testing on same data. Below it is used on data from last year:

In [28]:
old_data = np.array([[(24,2), (70,4), (41,3), (21,2), (60,4)],
                [(47,3), (82,5), (87,5), (80,5), (35,3)],
                [(73,4), (89,5), (100,8), (90,7), (17,2)],
                [(77,5), (83,5), (85,5), (79,5), (55,4)],
                [(12,2), (27,3), (52,4), (15,2), (30,3)]]).reshape(25,2)
old_res = np.array([[0.015,0.082,0.019,0,0.037],
[0.03,0.062,0.098,0.041,0.012],
[0.113,0.108,0.049,0.034,0.006],
[0.046,0.054,0.065,0.054,0.026],
[0,0,0.019,0,0]]).reshape(25)

combined_old_data = np.hstack((old_data, old_res.reshape(-1, 1)))

check_accuracy(combined_old_data, predict_percentage) 



{'Mean Absolute Error': 0.1045648,
 'Root Mean Squared Error': 0.1221223,
 'Accuracy': 99.8954352}

seems like it is still pretty good, hence we can use it to predict the percentages in the next round

In [29]:
new_data = np.array([
    [80, 6], [50, 4], [83, 7], [31, 2], [60, 4],   # Row A
    [89, 8], [10, 1], [37, 3], [70, 4], [90, 10],  # Row B
    [17, 1], [40, 3], [73, 4], [100, 15], [20, 2], # Row C
    [41, 3], [79, 5], [23, 2], [47, 3], [30, 2]    # Row D
])

preds = np.array([predict_percentage(m,h) for m,h in new_data]).clip(0)
combined_new_data = np.hstack((new_data, preds.reshape(-1, 1)))

print(combined_new_data)

[[8.00000000e+01 6.00000000e+00 1.87452281e-01]
 [5.00000000e+01 4.00000000e+00 1.05002575e-01]
 [8.30000000e+01 7.00000000e+00 1.67850066e-01]
 [3.10000000e+01 2.00000000e+00 7.83110152e-02]
 [6.00000000e+01 4.00000000e+00 1.55691799e-01]
 [8.90000000e+01 8.00000000e+00 1.63454617e-01]
 [1.00000000e+01 1.00000000e+00 6.67262769e-03]
 [3.70000000e+01 3.00000000e+00 7.39155667e-02]
 [7.00000000e+01 4.00000000e+00 2.06381023e-01]
 [9.00000000e+01 1.00000000e+01 9.89055735e-02]
 [1.70000000e+01 1.00000000e+00 4.21550845e-02]
 [4.00000000e+01 3.00000000e+00 8.91223339e-02]
 [7.30000000e+01 4.00000000e+00 2.21587790e-01]
 [1.00000000e+02 1.50000000e+01 0.00000000e+00]
 [2.00000000e+01 2.00000000e+00 2.25528688e-02]
 [4.10000000e+01 3.00000000e+00 9.41912563e-02]
 [7.90000000e+01 5.00000000e+00 2.17192342e-01]
 [2.30000000e+01 2.00000000e+00 3.77596360e-02]
 [4.70000000e+01 3.00000000e+00 1.24604791e-01]
 [3.00000000e+01 2.00000000e+00 7.32420928e-02]]
