In [35]:
import pandas as pd
import numpy as np


from core import constants
from core.utils import *

log = get_logger()

pd.set_option("display.float_format", "{:.2f}".format)

## RPGF 3 Data Check and Cleanup

In [36]:
df = pd.read_csv("data/dummy_data_rpgf3.csv")

In [37]:
display( df.sample(10) )
print('Check - Num Ballots: ' + str(df['Has published'].count()))
print('Check - Num Submissions: ' + str(df['Has published'].sum()))
print('Check - Num Not Voted: ' + str(df['Has published'].count()-df['Has published'].sum()))

Unnamed: 0,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
772,aYSKacGuVJ,True,False,,2023-11-16 12:27:34,2023-11-30 06:28:35,,[]
692,HLzcxxK0Jq,True,True,2023-10-25 18:16:22,2023-10-24 22:41:24,2023-11-05 07:42:14,166.0,"[{'amount': '491756', 'projectId': 'proj327'},..."
377,GZDvV2sfCw,True,True,2023-11-02 15:41:48,2023-09-07 18:24:10,2023-11-10 17:21:22,418.0,"[{'amount': '451796', 'projectId': 'proj246'},..."
680,Mo6EKrMkVl,True,False,,2023-11-16 09:51:07,2023-11-27 00:09:40,,[]
639,AGz9Gk912P,True,True,2023-11-19 05:11:27,2023-11-17 18:39:15,2023-11-23 13:29:46,94.0,"[{'amount': '134175', 'projectId': 'proj325'},..."
816,KuPqDhpcC1,True,False,,2023-11-22 11:08:36,2023-11-29 12:50:43,,[]
742,GvsV3pyScP,False,False,,2023-09-26 05:03:51,2023-11-11 10:02:42,,[]
136,eK7XQqtAtZ,True,False,,2023-10-10 08:30:20,2023-10-11 03:56:44,,[]
554,sRUZ1lJkLX,True,True,2023-11-26 00:42:58,2023-11-25 00:06:24,2023-11-27 03:52:48,487.0,"[{'amount': '85121', 'projectId': 'proj87'}, {..."
455,3CC9zDF0YK,False,False,,2023-09-23 19:04:45,2023-11-25 21:47:30,,[]


Check - Num Ballots: 1000
Check - Num Submissions: 486
Check - Num Not Voted: 514


In [38]:
# Check if voter_address is unique
if df["Address"].nunique() == df.shape[0]:
    log.info("Address is unique.")
else:
    diff = df.shape[0] - df["Address"].nunique()
    log.info(f"Address is not unique. There are {diff} duplicates.")

# Check if all voters have voted
if df[df["Has voted"] == False].shape[0] > 0:
    not_voted = df[df["Has voted"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"{not_voted} voters out of {total} have not voted.")
else:
    log.info("All voters have voted.")

# Check if all voters have published
if df[df["Has published"] == False].shape[0] > 0:
    not_voted = df[df["Has published"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"{not_voted} voters out of {total} have not published.")
else:
    log.info("All voters have published.")

2023-12-01 16:05:58 INFO | Address is unique.
2023-12-01 16:05:58 INFO | 518 voters out of 1000 have not voted.
2023-12-01 16:05:58 INFO | 514 voters out of 1000 have not published.


In [39]:
# Apply the function and concatenate results
expanded_list = [
    expand_json(safe_json_loads(row), idx) for idx, row in df["Votes"].items()
]
expanded_df = pd.concat(expanded_list, ignore_index=True)

result_df = expanded_df.set_index("original_index").join(df.set_index(df.index))

In [42]:
testing_address = 'upS4HKnaym'
print_df = result_df[result_df['Address'] == testing_address]
print('Num Projects Voted : ' + str(print_df['projectId'].count()))
display(print_df.head(10))

Num Projects Voted : 18


Unnamed: 0,amount,projectId,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
188,846539,proj87,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,"[{'amount': '846539', 'projectId': 'proj87'}, ..."
188,544633,proj370,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,"[{'amount': '846539', 'projectId': 'proj87'}, ..."
188,66023,proj149,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,"[{'amount': '846539', 'projectId': 'proj87'}, ..."
188,96559,proj167,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,"[{'amount': '846539', 'projectId': 'proj87'}, ..."
188,54627,proj175,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,"[{'amount': '846539', 'projectId': 'proj87'}, ..."
188,936331,proj472,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,"[{'amount': '846539', 'projectId': 'proj87'}, ..."
188,833315,proj252,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,"[{'amount': '846539', 'projectId': 'proj87'}, ..."
188,290359,proj159,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,"[{'amount': '846539', 'projectId': 'proj87'}, ..."
188,993632,proj333,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,"[{'amount': '846539', 'projectId': 'proj87'}, ..."
188,368979,proj367,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,"[{'amount': '846539', 'projectId': 'proj87'}, ..."


In [29]:
columns = [col for col in result_df.columns if col not in ["amount", "projectId"]]
columns += ["amount", "projectId"]  # Add the columns to the end of the list
result_df = result_df[columns]

# Update df columns names
result_df.columns = [
    "voter_address",
    "has_voted",
    "has_published",
    "published_at",
    "created_at",
    "updated_at",
    "projects_in_ballot",
    "votes",
    "amount",
    "project_id",
]

result_df.drop(columns="votes", inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_df.drop(columns="votes", inplace=True)


In [33]:
# result_df.head()
result_df[result_df['voter_address'] == testing_address].head(70)

Unnamed: 0,voter_address,has_voted,has_published,published_at,created_at,updated_at,projects_in_ballot,amount,project_id
188,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,846539,proj87
188,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,544633,proj370
188,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,66023,proj149
188,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,96559,proj167
188,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,54627,proj175
188,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,936331,proj472
188,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,833315,proj252
188,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,290359,proj159
188,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,993632,proj333
188,upS4HKnaym,True,True,2023-10-05 15:44:33,2023-09-17 14:19:37,2023-10-29 08:40:52,67.0,368979,proj367


## Calculate Voting Results

In [None]:
allocator = ProjectAllocator(
    total_amount=constants.TOTAL_AMOUNT,
    min_amount=constants.MIN_AMOUNT,
    quorum=constants.QUORUM,
)

In [None]:
initial_allocation = allocator.calculate_initial_allocation(result_df)

In [None]:
# Scaling the total to 30M OP by project and filter out those with < 1500 OP
allocation_iter = initial_allocation[initial_allocation["is_eligible"] == True].copy()
allocation_iter["scaled_amount"] = allocation_iter["median_amount"]

# Set a maximum number of iterations to prevent infinite loop
max_iterations = 10
current_iteration = 0

while (
    allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
    and current_iteration < max_iterations
):
    allocation_iter = allocator.scale_allocations(allocation_iter)
    current_iteration += 1

In [None]:
# Check if the loop exited due to reaching max iterations
if (
    current_iteration == max_iterations
    and allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
):
    log.info("Maximum iterations reached without meeting the total amount condition.")
else:
    final_total = allocation_iter["scaled_amount"].sum()
    log.info(
        f"Condition met with {final_total} OP allocated through {current_iteration} iteration(s)."
    )

In [None]:
# join the initial allocation with the final allocation, if scaled_amount is null then make it 0
final_allocation = initial_allocation.merge(
    allocation_iter["scaled_amount"],
    how="left",
    on="project_id",
).fillna({"scaled_amount": 0})

# check if the final allocation table still contains all projects.
if final_allocation.index.nunique() == result_df["project_id"].nunique():
    log.info("Final allocation table has included all the projects.")
else:
    log.info(
        "Final allocation table has missing projects. Printing out the missing projects below."
    )
    log.info(
        result_df[~result_df["project_id"].isin(final_allocation.index)]["project_id"]
    )

In [None]:
# export csv
# allocation_iter.drop(columns="median_amount", inplace=True)
final_allocation.to_csv("data/rpgf3_allocation_final.csv")

log.info(f"Results saved in data/rpgf3_allocation_final.csv.")