In [1]:
# Standard Imports
import math

import numpy as np
import pandas as pd
import os
import random
from typing import Dict

import matplotlib.pyplot as plt
import sys
sys.path.append('..')  # Add this line to include the directory above

# Import Data Processing Utilities
from utils.processing import (remove_blocklist_addresses,
                              validate_dataframe_june_19,
                              preprocess_data,
                              add_tef_graduate_column,
                              calculate_nft_weighted_sum,
                              create_dict_for_equal_cweights)


In [2]:
real_df = pd.read_csv("../data/2024-06-19_nft_balances.csv")

In [3]:
file_name = "../data/2024-06-19_nft_balances.csv"
file_exists = os.path.exists(file_name)
file_exists
# Read the CSV file
original_user_data = pd.read_csv(file_name)

# Set the index of the DataFrame to be Id
original_user_data.set_index('Id', inplace=True)


In [4]:
# Check to see that everything is valid, clean if not. 
user_data = validate_dataframe_june_19(original_user_data)

#Remove blocklist addresses 
BLOCKLIST_ADDRESSES = ["0xa55078f87ceDa4aC72380C639229014acD3D1F75",
"0xc6837f9d06D95Fa90CF91A6Dd6bB8cb51bfcfc59",
"0x4CF57d42B8aB8D7Bfa9Be1cdC35Ed84429cD2168"]

user_data = remove_blocklist_addresses(df = user_data, 
                                       blocklist = BLOCKLIST_ADDRESSES)

# Preprocess data and add a tef_graduate column
user_data = preprocess_data(df = user_data)
user_data = add_tef_graduate_column(user_data)

Column tokenId 2 contains 1 values other than 0 or 1.
Setting these values to be in (0, 1)


In [30]:
user_data.to_csv("../data/processed_2024-06-19_nft_balances.csv")

In [6]:
user_data.columns

Index(['tokenId 1', 'tokenId 2', 'tokenId 3', 'tokenId 4', 'tokenId 5',
       'tokenId 6', 'tokenId 7', 'tokenId 8', 'tokenId 9', 'tokenId 10',
       'tokenId 11', 'tokenId 12', 'tokenId 13', 'tokenId 14', 'tokenId 15',
       'tokenId 16', 'tokenId 17', 'tokenId 18', 'tokenId 19', 'tokenId 20',
       'tokenId 21', 'tokenId 22', 'tokenId 23', 'tokenId 24', 'tokenId 25',
       'tokenId 26', 'tokenId 27', 'tokenId 28', 'tokenId 29', 'tokenId 30',
       'tokenId 31', 'tokenId 32', 'tokenId 33', 'tokenId 34', 'tokenId 35',
       'tokenId 36', 'tokenId 37', 'tokenId 38', 'tokenId 39', 'tokenId 40',
       'tokenId 41', 'tokenId 42', 'tokenId 43', 'tokenId 44', 'tokenId 45',
       'tef_graduate'],
      dtype='object')

## Creating An Initial Weights Dictionary

The dictionary below gives initial default weights for all attributes that a wallet could have.

**Note:** Some attributes are set to 0, as a safety check on accidentally counting attributes that no user should have after processing. 

In [7]:
default_weights_dict = {
    "tokenId 1": 7,   # Token ID 1
    "tokenId 2": 0,   # Token ID 2
    "tokenId 3": 7,   # Token ID 3
    "tokenId 4": 0,   # Token ID 4
    "tokenId 5": 7,   # Token ID 5
    "tokenId 6": 0,   # Token ID 6
    "tokenId 7": 7,   # Token ID 7
    "tokenId 8": 0,   # Token ID 8
    "tokenId 9": 7,   # Token ID 9
    "tokenId 10": 0,  # Token ID 10
    "tokenId 11": 20, # Token ID 11
    "tokenId 12": 10, # Token ID 12
    "tokenId 13": 10, # Token ID 13
    "tokenId 14": 10, # Token ID 14
    "tokenId 15": 16, # Token ID 15
    "tokenId 16": 1,  # Token ID 16
    "tokenId 17": 16, # Token ID 17
    "tokenId 18": 5,  # Token ID 18
    "tokenId 19": 1,  # Token ID 19
    "tokenId 20": 3,  # Token ID 20
    "tokenId 21": 3,  # Token ID 21
    "tokenId 22": 1,  # Token ID 22
    "tokenId 23": 10, # Token ID 23
    "tokenId 24": 10, # Token ID 24
    "tokenId 25": 10, # Token ID 25
    "tokenId 26": 10, # Token ID 26
    "tokenId 27": 10, # Token ID 27
    "tokenId 28": 10, # Token ID 28
    "tokenId 29": 10, # Token ID 29
    "tokenId 30": 10, # Token ID 30
    "tokenId 31": 15, # Token ID 31
    "tokenId 32": 15, # Token ID 32
    "tokenId 33": 15, # Token ID 33
    "tokenId 34": 15, # Token ID 34
    "tokenId 35": 15, # Token ID 35
    "tokenId 36": 15, # Token ID 36
    "tokenId 37": 15, # Token ID 37
    "tokenId 38": 15, # Token ID 38
    "tokenId 39": 18, # Token ID 39
    "tokenId 40": 1,  # Token ID 40
    "tokenId 41": 4, # Token ID 41
    "tokenId 42": 18, # Token ID 42
    "tokenId 43": 16,  # Token ID 43
    "tokenId 44": 1,  # Token ID 44,
    "tokenId 45": 10, # Token ID 45,
    "tef_graduate": 0, #will be set in a moment
}

In [8]:
expert_tokenIds = [f"tokenId {num}" for num in [11, 15, 17, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45]]
graduate_tokenIds = [f"tokenId {num}" for num in  [1,2,3,4,5,6,7,8,9,10]]
student_tokenIds = [f"tokenId {num}" for num in [12, 13, 14, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]]

In [9]:
modified_weights_dict = create_dict_for_equal_cweights(df = user_data,
                                                       original_weights_dict = default_weights_dict,
                                                       expert_tokenIds_list = expert_tokenIds,
                                                       graduate_tokenIds_list= graduate_tokenIds)

The original total cweight is: 9382.
The original expert cweight is: 708.
The original graduate cweight is: 3850.
The total TEF tokenIDs weight (for passing all five TEF modules): is 35.
There are 110 TEF graduates.
The original student cweight is: 4824.
Original student cweight is greater than original graduate cweight.
Scaling graduate cweight.
The TEF graduate boost to make the two cweights equal is: 8.854545454545452.
After calculation, final student cweight should be: 4824.
After calculation, final graduate cweight should be: 4824.0.
The columns of this new TEF graduate df are: 

tokenId 1
tokenId 2
tokenId 3
tokenId 4
tokenId 5
tokenId 6
tokenId 7
tokenId 8
tokenId 9
tokenId 10
tef_graduate
There are 110 graduates in this DataFrame.
The weight of TEF graduate is: 8.854545454545452

 Final Check: 

Actual final total cweight is 14472.000000000002.
Actual final expert cweight is 4824.0.
Actual final graduate cweight is 4824.0.
Actual final student cweight is: 4824.000000000002.


In [26]:
import pandas as pd

# Convert the modified_weights_dict to a DataFrame
modified_weights_df = pd.DataFrame.from_dict(modified_weights_dict, orient='index', columns=['Weight']).T

In [27]:
modified_weights_df

Unnamed: 0,tokenId 1,tokenId 2,tokenId 3,tokenId 4,tokenId 5,tokenId 6,tokenId 7,tokenId 8,tokenId 9,tokenId 10,...,tokenId 37,tokenId 38,tokenId 39,tokenId 40,tokenId 41,tokenId 42,tokenId 43,tokenId 44,tokenId 45,tef_graduate
Weight,7.0,0.0,7.0,0.0,7.0,0.0,7.0,0.0,7.0,0.0,...,102.20339,102.20339,122.644068,6.813559,27.254237,122.644068,109.016949,1.0,68.135593,8.854545


In [28]:
modified_weights_df.to_csv("../data/2024-06-19_modified_weights_dict.csv")