In [16]:
import pandas as pd
import numpy as np


from core import constants
from core.utils import *

log = get_logger()

pd.set_option("display.float_format", "{:.2f}".format)

## RPGF 3 Data Check and Cleanup

In [17]:
df = pd.read_csv("data/dummy_data_rpgf3.csv")

In [18]:
display( df.sample(10) )
print('Check - Num Ballots: ' + str(df['Has published'].count()))
print('Check - Num Submissions: ' + str(df['Has published'].sum()))
print('Check - Num Not Voted: ' + str(df['Has published'].count()-df['Has published'].sum()))

Unnamed: 0,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
839,evzEB3HJMl,False,False,,2023-09-10 16:53:11,2023-11-24 20:37:13,0,[]
850,RvZatqpcBm,True,True,2023-11-14 14:52:09,2023-10-22 12:38:54,2023-11-22 15:54:54,19,"[{'amount': '967479', 'projectId': 'proj342'},..."
502,t2bUhngVp0,True,False,,2023-10-27 00:57:20,2023-10-28 03:28:44,0,[]
49,sox9fumVak,True,True,2023-11-26 07:05:46,2023-11-25 08:53:35,2023-11-28 07:31:48,4,"[{'amount': '165316', 'projectId': 'proj397'},..."
885,asH4YTzpuD,True,True,2023-11-13 17:26:47,2023-11-12 23:59:31,2023-11-16 08:01:11,13,"[{'amount': '674078', 'projectId': 'proj255'},..."
625,wrlDO0r7Cb,False,False,,2023-10-28 18:01:05,2023-11-07 00:09:48,0,[]
364,LAS0mxiqAS,False,True,2023-10-24 03:02:19,2023-10-23 12:59:56,2023-10-25 18:31:58,9,"[{'amount': '976556', 'projectId': 'proj459'},..."
903,zOfuavBPyL,False,True,2023-10-13 10:13:20,2023-10-01 00:50:07,2023-11-29 04:30:22,7,"[{'amount': '908718', 'projectId': 'proj98'}, ..."
287,xg9fPuP9v9,True,True,2023-10-14 11:36:11,2023-10-10 00:05:41,2023-10-27 21:08:54,12,"[{'amount': '364577', 'projectId': 'proj20'}, ..."
890,vImNMp5xHs,True,True,2023-11-11 08:59:24,2023-11-06 02:25:29,2023-11-16 01:07:20,9,"[{'amount': '579584', 'projectId': 'proj299'},..."


Check - Num Ballots: 1000
Check - Num Submissions: 493
Check - Num Not Voted: 507


In [19]:
# Check if voter_address is unique
if df["Address"].nunique() == df.shape[0]:
    log.info("Address is unique.")
else:
    diff = df.shape[0] - df["Address"].nunique()
    log.info(f"Address is not unique. There are {diff} duplicates.")

# Check if all voters have voted
if df[df["Has voted"] == False].shape[0] > 0:
    not_voted = df[df["Has voted"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"{not_voted} voters out of {total} have not voted.")
else:
    log.info("All voters have voted.")

# Check if all voters have published
if df[df["Has published"] == False].shape[0] > 0:
    not_voted = df[df["Has published"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"{not_voted} voters out of {total} have not published.")
else:
    log.info("All voters have published.")

2023-12-01 16:32:28 INFO | Address is unique.
2023-12-01 16:32:28 INFO | 507 voters out of 1000 have not voted.
2023-12-01 16:32:28 INFO | 507 voters out of 1000 have not published.


In [20]:
# Apply the function and concatenate results
expanded_list = [
    expand_json(safe_json_loads(row), idx) for idx, row in df["Votes"].items()
]
expanded_df = pd.concat(expanded_list, ignore_index=True)

result_df = expanded_df.set_index("original_index").join(df.set_index(df.index))

In [21]:
testing_address = 'PJHMETRidb'
print_df = result_df[result_df['Address'] == testing_address]
print('Num Projects Voted : ' + str(print_df['projectId'].count()))
display(print_df.head(10))

Num Projects Voted : 18


Unnamed: 0,amount,projectId,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
913,403018,proj478,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,302656,proj73,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,415318,proj160,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,118968,proj346,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,225967,proj399,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,637492,proj440,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,737502,proj493,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,475858,proj359,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,665986,proj313,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,815862,proj274,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."


In [22]:
columns = [col for col in result_df.columns if col not in ["amount", "projectId"]]
columns += ["amount", "projectId"]  # Add the columns to the end of the list
result_df = result_df[columns]

# Update df columns names
result_df.columns = [
    "voter_address",
    "has_voted",
    "has_published",
    "published_at",
    "created_at",
    "updated_at",
    "projects_in_ballot",
    "votes",
    "amount",
    "project_id",
]

result_df.drop(columns="votes", inplace=True)

In [23]:
# result_df.head()
result_df[result_df['voter_address'] == testing_address].head(70)

Unnamed: 0,voter_address,has_voted,has_published,published_at,created_at,updated_at,projects_in_ballot,amount,project_id
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,403018,proj478
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,302656,proj73
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,415318,proj160
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,118968,proj346
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,225967,proj399
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,637492,proj440
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,737502,proj493
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,475858,proj359
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,665986,proj313
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,815862,proj274


## Calculate Voting Results

In [24]:
allocator = ProjectAllocator(
    total_amount=constants.TOTAL_AMOUNT,
    min_amount=constants.MIN_AMOUNT,
    quorum=constants.QUORUM,
)

In [25]:
initial_allocation = allocator.calculate_initial_allocation(result_df)

In [26]:
display(initial_allocation.sample(10))

Unnamed: 0_level_0,votes_count,median_amount,is_eligible
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
proj107,8,461543.5,False
proj242,11,721534.0,False
proj58,11,427628.0,False
proj317,15,416528.0,False
proj79,7,612465.0,False
proj339,14,514770.5,False
proj370,8,519426.0,False
proj466,10,432331.0,False
proj45,15,395928.0,False
proj67,9,580981.0,False


In [27]:
# Scaling the total to 30M OP by project and filter out those with < 1500 OP
allocation_iter = initial_allocation[initial_allocation["is_eligible"] == True].copy()
allocation_iter["scaled_amount"] = allocation_iter["median_amount"]
# display(allocation_iter)
# Set a maximum number of iterations to prevent infinite loop
max_iterations = 10
current_iteration = 0

while (
    allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
    and current_iteration < max_iterations
):
    allocation_iter = allocator.scale_allocations_oneby(allocation_iter)
    current_iteration += 1
    print('current_iteration: ' + str(current_iteration))

amount eligible: 9420860.0
scale factor: 3.1844226535581677
No projects below minimum OP
current_iteration: 1


In [28]:
# Check if the loop exited due to reaching max iterations
if (
    current_iteration == max_iterations
    and allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
):
    log.info("Maximum iterations reached without meeting the total amount condition.")
else:
    final_total = allocation_iter["scaled_amount"].sum()
    log.info(
        f"Condition met with {final_total} OP allocated through {current_iteration} iteration(s)."
    )

2023-12-01 16:32:29 INFO | Condition met with 30000000.0 OP allocated through 1 iteration(s).


In [29]:
# join the initial allocation with the final allocation, if scaled_amount is null then make it 0
final_allocation = initial_allocation.merge(
    allocation_iter["scaled_amount"],
    how="left",
    on="project_id",
).fillna({"scaled_amount": 0})

# check if the final allocation table still contains all projects.
if final_allocation.index.nunique() == result_df["project_id"].nunique():
    log.info("Final allocation table has included all the projects.")
else:
    log.info(
        "Final allocation table has missing projects. Printing out the missing projects below."
    )
    log.info(
        result_df[~result_df["project_id"].isin(final_allocation.index)]["project_id"]
    )

# check if the final allocation table still sums to the total amount.
if final_allocation["scaled_amount"].sum() == final_total:
    log.info("Final allocation table sums to the right amount of OP: " + str(final_total))
else:
    log.info(
        "Final allocation table does not sum to the total OP. Printing out the missing amount below."
    )
    log.info(
        str(final_total-final_allocation["scaled_amount"].sum()) + " OP"
    )

2023-12-01 16:32:29 INFO | Final allocation table has included all the projects.
2023-12-01 16:32:29 INFO | Final allocation table sums to the total OP.


In [30]:
# export csv
# allocation_iter.drop(columns="median_amount", inplace=True)
final_allocation.to_csv("data/rpgf3_allocation_final.csv")

log.info(f"Results saved in data/rpgf3_allocation_final.csv.")

2023-12-01 16:32:29 INFO | Results saved in data/rpgf3_allocation_final.csv.
