In [10]:
import numpy as np
import pandas as pd

In [4]:
def TRS(initial_weights):
    # Desired total constraint
    desired_total = sum(initial_weights)

    # Step 1: Truncate the weights to integers
    truncated_weights = np.floor(initial_weights).astype(int)

    # Step 2: Calculate the discrepancy
    total_truncated = np.sum(truncated_weights)
    discrepancy = desired_total - total_truncated

    # Step 3: Replicate individuals to match the constraint
    if discrepancy > 0:
        # Calculate fractional parts
        fractional_parts = initial_weights - truncated_weights
        
        # Replicate individuals in proportion to their fractional parts
        replication_probs = fractional_parts / np.sum(fractional_parts)
        num_replications = np.random.multinomial(discrepancy, replication_probs)
        truncated_weights += num_replications

    # Step 4: Sample individuals if there is an excess
    if discrepancy < 0:
        excess_indices = np.where(truncated_weights > 0)[0]
        excess_weights = truncated_weights[excess_indices]
        
        # Calculate sampling probabilities based on truncated weights
        sampling_probs = excess_weights / np.sum(excess_weights)
        
        # Randomly sample individuals to reduce excess
        num_samples = np.random.multinomial(abs(discrepancy), sampling_probs)
        truncated_weights[excess_indices] -= num_samples

    # Step 5: Your final truncated and rounded integer weights
    return truncated_weights

In [25]:
df_evs = pd.read_csv("Data\pred_evs_sa1")
array = df_evs["pred_evs"]
df_evs["int_evs"] = TRS(array)
print(f"Total pred: {sum(df_evs['pred_evs'])}, Total int: {sum(df_evs['int_evs'])}")
df_evs

Total pred: 6441.814414298115, Total int: 6441


Unnamed: 0,SA1_CODE21,pred_evs,int_evs
0,20301103401,0.293712,0
1,20301103402,0.081866,0
2,20301103403,0.541426,0
3,20301103404,0.313661,0
4,20301103407,0.642970,1
...,...,...,...
11942,21402159219,0.899134,2
11943,21402159220,1.320183,1
11944,21402159221,0.605463,1
11945,21402159222,0.930433,1


In [28]:
df_evs.to_csv("Data\pred_evs_sa1_int.csv", index=False)