# 04 Derive our initial project metrics at 90 and 180 day intervals

Metrics:
- active_addresses_180D
- active_addresses_90D
- daily_active_addresses_180D
- daily_active_addresses_90D
- farcaster_users_180D
- farcaster_users_90D
- transactions_180D
- transactions_90D

## Part 1: Load projects and process the event data

In [1]:
import pandas as pd

In [2]:
DF_PROJECTS = pd.read_csv('data/apps/project_apps_labeled.csv', index_col=0)
UUID_RECIPIENT_MAPPING = DF_PROJECTS['recipient'].to_dict()
DF_PROJECTS.head(1)

Unnamed: 0_level_0,recipient,project_type,category,address,chain,contract_type
uuid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
89eb0a1c-36f6-4455-af98-c822064425bb,0xE4EE538019673501F4B75de5aF5CC073Ec0A1487,App,DEX,0xdef1c0ded9bec7f1a1670819833240f027b25eff,Base,dapp_contract


In [3]:
DF_APPS = pd.read_csv('data/apps/applications_reviewed.csv')
UUID_NAME_MAPPING = DF_APPS.set_index('uuid')['name'].to_dict()
UUID_APP_MAPPING = DF_APPS.set_index('uuid')['id'].to_dict()
DF_APPS.head(1)

Unnamed: 0,uuid,charmverseId,agoraProjectRefUID,id,recipient,time,name,status,profile_name,profile_url,...,chain_id,chain,flag_multiple_projects_same_profile,flag_creator_no_address,flag_app_missing_contract,flag_channel_no_channel,flag_charmverse_in_name,flag_creator_address_conflict,count_flags,has_flag
0,6b39038b-34f3-43dd-a2e4-3fe29471e71a,6b39038b-34f3-43dd-a2e4-3fe29471e71a,,0xd774437712806b09d3eb413ee6851ac790fdce47cb1a...,0x4407AfAc54edC2215dB4252995cCCF7946Ef211B,1734789197,0x1bit,pending,trendy,https://warpcast.com/trendy,...,,All Superchain,False,False,True,False,False,False,1,1


In [4]:
PARTITION_DATE = '2024-06-01'

DF_EVENTS = pd.read_parquet('data/raw_metric_data/project_events.parquet')

DF_EVENTS = DF_EVENTS[DF_EVENTS['uuid'].isin(DF_PROJECTS.index)]
DF_EVENTS = DF_EVENTS[DF_EVENTS['data_source'] != 'factory_deploys']
DF_EVENTS['farcaster_id'].fillna(0, inplace=True)

ID_COLS = ['recipient', 'name']
EVENT_COLS = ['uuid', 'date', 'chain', 'address', 'user_address', 'farcaster_id']
DF_180D = DF_EVENTS.groupby(EVENT_COLS)['count_transactions'].max().reset_index()
DF_180D['recipient'] = DF_180D['uuid'].map(UUID_RECIPIENT_MAPPING)
DF_180D['name'] = DF_180D['uuid'].map(UUID_NAME_MAPPING)

DF_90D = DF_180D[DF_180D['date'] >= PARTITION_DATE]
DF_90D.tail(1)

Unnamed: 0,uuid,date,chain,address,user_address,farcaster_id,count_transactions,recipient,name
21869577,ff7d4b80-83b7-4c59-93da-5b2524195dc9,2024-08-29,Base,0xb676f87a6e701f0de8de5ab91b56b66109766db1,0x977f82a600a1414e583f7f13623f1ac5d58b1c0b,0.0,1.0,0x460464c72E010A805043195c56690347f67BE746,BLOCKLORDS


In [5]:
COLS = ID_COLS + EVENT_COLS + ['count_transactions']

## Part 2. Get metrics by UUID

In [6]:
METRIC_COLS = [
    'transactions_90D', 'transactions_180D',
    'active_addresses_90D', 'active_addresses_180D',
    'farcaster_users_90D', 'farcaster_users_180D',
    'daily_active_addresses_90D', 'daily_active_addresses_180D'
]

In [7]:
def calculate_project_metrics_by_uuid(uuid):
    
    dff_180D = DF_180D[DF_180D.uuid == uuid]
    dff_90D = DF_90D[DF_90D.uuid == uuid]
    
    daas_90 = dff_90D.groupby('date')['user_address'].nunique().sum() / 90
    daas_180 = dff_180D.groupby('date')['user_address'].nunique().sum() / 180

    return {
        'uuid': uuid,
        'transactions_90D': dff_90D['count_transactions'].sum(),
        'transactions_180D': dff_180D['count_transactions'].sum(),
        'active_addresses_90D': dff_90D['user_address'].nunique(),
        'active_addresses_180D': dff_180D['user_address'].nunique(),
        'farcaster_users_90D': dff_90D['farcaster_id'].nunique(),
        'farcaster_users_180D': dff_180D['farcaster_id'].nunique(),
        'daily_active_addresses_90D': int(daas_90),
        'daily_active_addresses_180D': int(daas_180),
    }

In [8]:
valid_uuids = DF_90D.uuid.unique()
df_metrics_by_uuid = pd.DataFrame([calculate_project_metrics_by_uuid(u) for u in valid_uuids])
df_metrics_by_uuid['name'] = df_metrics_by_uuid['uuid'].map(UUID_NAME_MAPPING)
df_metrics_by_uuid['application_id'] = df_metrics_by_uuid['uuid'].map(UUID_APP_MAPPING)

In [9]:
id_cols = ['application_id', 'name']
(
    df_metrics_by_uuid
    .set_index('uuid')
    [id_cols+METRIC_COLS]
    .to_csv('data/clean_metric_data/metrics_projects_by_uuid.csv')
)

In [10]:
explorers = {
    'Base': "https://basescan.org/address/",
    'Optimism': "https://optimistic.etherscan.io/address/",
    'Zora': "https://zora.thesuperscan.io/address/",
    'Mode': "https://explorer.mode.network/address/",
    'Mint': "https://explorer.mintchain.io/address/",
    'SwanChain': "https://mainnet-explorer.swanchain.io/address/",
    'Kroma': "https://kromascan.com/address/",
    'Cyber': "https://cyberscan.co/address/",
    'Orderly': "https://explorer.orderly.network/address/",
    'Polynomial': "https://polynomialscan.io/address/",
    'Lisk': "https://blockscout.lisk.com/address/",
    'Redstone': "https://explorer.redstone.xyz/address/"
}

df_metrics_by_uuid_detailed = DF_PROJECTS.join(df_metrics_by_uuid.set_index('uuid')[METRIC_COLS])
df_metrics_by_uuid_detailed['block_explorer'] = df_metrics_by_uuid_detailed.apply(
    lambda x: explorers.get(x['chain']) + x['address'], axis=1
)
other_cols = list(df_metrics_by_uuid_detailed.columns)
df_metrics_by_uuid_detailed['name'] = df_metrics_by_uuid_detailed.index.map(UUID_NAME_MAPPING)

(
    df_metrics_by_uuid_detailed
    [['name'] + other_cols]
    .to_csv('data/clean_metric_data/metrics_projects_by_uuid_detailed.csv')
)

## Part 3. Get metrics by recipient ID

In [11]:
def calculate_project_metrics_by_recipient(recipient, name):
    
    dff_180D = DF_180D[(DF_180D.recipient == recipient) & (DF_180D.name == name)]
    dff_90D = dff_180D[dff_180D.date >= PARTITION_DATE]
    
    daas_90 = dff_90D.groupby('date')['user_address'].nunique().sum() / 90
    daas_180 = dff_180D.groupby('date')['user_address'].nunique().sum() / 180

    return {
        'recipient': recipient,
        'name': name,
        'transactions_90D': dff_90D['count_transactions'].sum(),
        'transactions_180D': dff_180D['count_transactions'].sum(),
        'active_addresses_90D': dff_90D['user_address'].nunique(),
        'active_addresses_180D': dff_180D['user_address'].nunique(),
        'farcaster_users_90D': dff_90D['farcaster_id'].nunique(),
        'farcaster_users_180D': dff_180D['farcaster_id'].nunique(),
        'daily_active_addresses_90D': int(daas_90),
        'daily_active_addresses_180D': int(daas_180),
        'uuid_list': list(dff_180D['uuid'].unique()),
    }

In [12]:
project_identifiers = list(
    pd.concat([
        pd.Series(UUID_RECIPIENT_MAPPING, name='recipient'),
        pd.Series(UUID_NAME_MAPPING, name='name')
    ], axis=1)
    .dropna()
    .reset_index()
    .groupby(['recipient','name'])
    ['index']
    .agg(set)
    .index
)

df_metrics_by_recipient = pd.DataFrame([
    calculate_project_metrics_by_recipient(r,n)
    for (r,n) in project_identifiers
])

df_metrics_by_recipient['application_id_list'] = df_metrics_by_recipient['uuid_list'].apply(
    lambda lst: [UUID_APP_MAPPING.get(x) for x in lst]
)

In [13]:
df_metrics_by_recipient.set_index(['recipient', 'name'], inplace=True)
(
    df_metrics_by_recipient[
        df_metrics_by_recipient['uuid_list'].apply(len) > 0
    ].to_csv('data/clean_metric_data/metrics_projects_by_recipient.csv')
)