In [1]:
import pandas as pd
import numpy as np


from core import constants
from core.utils import *

log = get_logger()

pd.set_option("display.float_format", "{:.2f}".format)

## RPGF 3 Data Check and Cleanup

In [2]:
df = pd.read_csv("data/dummy_data_rpgf3.csv")

In [3]:
display( df.sample(10) )
print('Check - Num Ballots: ' + str(df['Has published'].count()))
print('Check - Num Submissions: ' + str(df['Has published'].sum()))
print('Check - Num Not Voted: ' + str(df['Has published'].count()-df['Has published'].sum()))

Unnamed: 0,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
416,y1Sg2ioFFV,False,False,,2023-11-01 09:42:49,2023-11-26 02:42:30,0,[]
615,WEjbErADrG,False,False,,2023-10-02 07:24:11,2023-10-19 21:05:15,0,[]
730,6ZWC2VQ54w,False,True,2023-09-17 16:07:40,2023-09-16 10:45:36,2023-10-31 20:02:41,12,"[{'amount': '860257', 'projectId': 'proj456'},..."
780,gHSnPScjC3,False,True,2023-10-15 16:59:12,2023-10-10 21:45:04,2023-10-21 09:54:32,20,"[{'amount': '160107', 'projectId': 'proj305'},..."
169,KgKnpUwr2u,True,False,,2023-10-30 07:25:38,2023-11-13 21:41:13,0,[]
376,e7WXR5MbLU,True,False,,2023-10-04 07:12:29,2023-10-12 04:32:03,0,[]
394,aNp7nGFTmi,True,False,,2023-11-01 14:32:39,2023-11-09 13:52:05,0,[]
366,ej92hOFu17,True,False,,2023-10-30 08:48:08,2023-11-08 12:06:14,0,[]
58,AmViwOjQf0,False,False,,2023-10-09 15:38:50,2023-11-11 05:13:43,0,[]
546,Q3IK1EJVUp,True,True,2023-10-19 19:04:41,2023-09-18 19:51:45,2023-10-28 06:03:04,15,"[{'amount': '269096', 'projectId': 'proj366'},..."


Check - Num Ballots: 1000
Check - Num Submissions: 493
Check - Num Not Voted: 507


In [4]:
# Check if voter_address is unique
if df["Address"].nunique() == df.shape[0]:
    log.info("Address is unique.")
else:
    diff = df.shape[0] - df["Address"].nunique()
    log.info(f"Address is not unique. There are {diff} duplicates.")

# Check if all voters have voted
if df[df["Has voted"] == False].shape[0] > 0:
    not_voted = df[df["Has voted"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"{not_voted} voters out of {total} have not voted.")
else:
    log.info("All voters have voted.")

# Check if all voters have published
if df[df["Has published"] == False].shape[0] > 0:
    not_voted = df[df["Has published"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"{not_voted} voters out of {total} have not published.")
else:
    log.info("All voters have published.")

2023-12-01 16:30:44 INFO | Address is unique.
2023-12-01 16:30:44 INFO | 507 voters out of 1000 have not voted.
2023-12-01 16:30:44 INFO | 507 voters out of 1000 have not published.


In [5]:
# Apply the function and concatenate results
expanded_list = [
    expand_json(safe_json_loads(row), idx) for idx, row in df["Votes"].items()
]
expanded_df = pd.concat(expanded_list, ignore_index=True)

result_df = expanded_df.set_index("original_index").join(df.set_index(df.index))

In [6]:
testing_address = 'PJHMETRidb'
print_df = result_df[result_df['Address'] == testing_address]
print('Num Projects Voted : ' + str(print_df['projectId'].count()))
display(print_df.head(10))

Num Projects Voted : 18


Unnamed: 0,amount,projectId,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
913,403018,proj478,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,302656,proj73,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,415318,proj160,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,118968,proj346,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,225967,proj399,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,637492,proj440,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,737502,proj493,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,475858,proj359,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,665986,proj313,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."
913,815862,proj274,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,"[{'amount': '403018', 'projectId': 'proj478'},..."


In [7]:
columns = [col for col in result_df.columns if col not in ["amount", "projectId"]]
columns += ["amount", "projectId"]  # Add the columns to the end of the list
result_df = result_df[columns]

# Update df columns names
result_df.columns = [
    "voter_address",
    "has_voted",
    "has_published",
    "published_at",
    "created_at",
    "updated_at",
    "projects_in_ballot",
    "votes",
    "amount",
    "project_id",
]

result_df.drop(columns="votes", inplace=True)

In [8]:
# result_df.head()
result_df[result_df['voter_address'] == testing_address].head(70)

Unnamed: 0,voter_address,has_voted,has_published,published_at,created_at,updated_at,projects_in_ballot,amount,project_id
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,403018,proj478
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,302656,proj73
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,415318,proj160
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,118968,proj346
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,225967,proj399
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,637492,proj440
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,737502,proj493
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,475858,proj359
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,665986,proj313
913,PJHMETRidb,False,True,2023-10-24 14:14:42,2023-09-04 21:40:19,2023-11-17 13:35:19,18,815862,proj274


## Calculate Voting Results

In [9]:
allocator = ProjectAllocator(
    total_amount=constants.TOTAL_AMOUNT,
    min_amount=constants.MIN_AMOUNT,
    quorum=constants.QUORUM,
)

In [10]:
initial_allocation = allocator.calculate_initial_allocation(result_df)

In [11]:
display(initial_allocation.sample(10))

Unnamed: 0_level_0,votes_count,median_amount,is_eligible
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
proj121,15,590856.0,False
proj182,7,347224.0,False
proj232,11,407502.0,False
proj331,7,395148.0,False
proj497,7,548346.0,False
proj304,10,472295.0,False
proj475,9,562337.0,False
proj242,11,721534.0,False
proj429,13,733937.0,False
proj10,6,262550.0,False


In [12]:
# Scaling the total to 30M OP by project and filter out those with < 1500 OP
allocation_iter = initial_allocation[initial_allocation["is_eligible"] == True].copy()
allocation_iter["scaled_amount"] = allocation_iter["median_amount"]
# display(allocation_iter)
# Set a maximum number of iterations to prevent infinite loop
max_iterations = 10
current_iteration = 0

while (
    allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
    and current_iteration < max_iterations
):
    allocation_iter = allocator.scale_allocations_oneby(allocation_iter)
    current_iteration += 1
    print('current_iteration: ' + str(current_iteration))

amount eligible: 9420860.0
scale factor: 3.1844226535581677
No projects below minimum OP
current_iteration: 1


In [13]:
# Check if the loop exited due to reaching max iterations
if (
    current_iteration == max_iterations
    and allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
):
    log.info("Maximum iterations reached without meeting the total amount condition.")
else:
    final_total = allocation_iter["scaled_amount"].sum()
    log.info(
        f"Condition met with {final_total} OP allocated through {current_iteration} iteration(s)."
    )

2023-12-01 16:30:44 INFO | Condition met with 30000000.0 OP allocated through 1 iteration(s).


In [14]:
# join the initial allocation with the final allocation, if scaled_amount is null then make it 0
final_allocation = initial_allocation.merge(
    allocation_iter["scaled_amount"],
    how="left",
    on="project_id",
).fillna({"scaled_amount": 0})

# check if the final allocation table still contains all projects.
if final_allocation.index.nunique() == result_df["project_id"].nunique():
    log.info("Final allocation table has included all the projects.")
else:
    log.info(
        "Final allocation table has missing projects. Printing out the missing projects below."
    )
    log.info(
        result_df[~result_df["project_id"].isin(final_allocation.index)]["project_id"]
    )

2023-12-01 16:30:44 INFO | Final allocation table has included all the projects.


In [15]:
# export csv
# allocation_iter.drop(columns="median_amount", inplace=True)
final_allocation.to_csv("data/rpgf3_allocation_final.csv")

log.info(f"Results saved in data/rpgf3_allocation_final.csv.")

2023-12-01 16:30:44 INFO | Results saved in data/rpgf3_allocation_final.csv.
