In [1]:
import os
import sys
import pandas as pd
import yaml 
from matplotlib import pyplot as plt

with open("../../config.yaml.local", "r") as f:
    LOCAL_CONFIG = yaml.safe_load(f)
#with open("../../config.yaml", "r") as f:
#    CONFIG = yaml.safe_load(f)
sys.path.append("../python")

import globals

RAW_DATA_PATH = LOCAL_CONFIG["RAW_DATA_PATH"]


In [2]:
users_df = pd.read_parquet(os.path.join(RAW_DATA_PATH, "users.parquet"))
item_df = pd.read_parquet(os.path.join(RAW_DATA_PATH, "item.parquet"))
itemact_df = pd.read_parquet(os.path.join(RAW_DATA_PATH, "itemact.parquet"))
invoice_df = pd.read_parquet(os.path.join(RAW_DATA_PATH, "invoice.parquet"))
withdrawal_df = pd.read_parquet(os.path.join(RAW_DATA_PATH, "withdrawal.parquet"))
invoiceforward_df = pd.read_parquet(os.path.join(RAW_DATA_PATH, "invoiceforward.parquet"))
wallet_df = pd.read_parquet(os.path.join(RAW_DATA_PATH, "wallet.parquet"))
walletprotocol_df = pd.read_parquet(os.path.join(RAW_DATA_PATH, "walletprotocol.parquet"))


In [3]:
fees_df = itemact_df.loc[itemact_df['act']=='FEE']
tips_df = itemact_df.loc[itemact_df['act']=='TIP']

fees_df = fees_df.rename(columns={
    'id': 'fee_id',
    'updated_at': 'fee_updated_at',
    'msats': 'fee_msats',
    'act': 'fee_act',
    'invoiceActionState': 'fee_invoiceActionState',
    'invoiceId': 'fee_invoiceId'
})
tips_df = tips_df.rename(columns={
    'id': 'tip_id',
    'updated_at': 'tip_updated_at',
    'msats': 'tip_msats',
    'act': 'tip_act',
    'invoiceActionState': 'tip_invoiceActionState',
    'invoiceId': 'tip_invoiceId'
})

fees_tips_df = pd.merge(
    fees_df, tips_df,
    on=['created_at', 'itemId', 'userId'],
    how='outer'
)


In [4]:
fees_tips_df['is_zap'] = fees_tips_df['tip_id'].notnull()
fees_tips_df['sybil_fee'] = (fees_tips_df['is_zap']) & (fees_tips_df['fee_id'].notnull())
fees_tips_df['post_fee'] = (~fees_tips_df['is_zap']) & (fees_tips_df['fee_id'].notnull())
fees_tips_df['total_msats'] = fees_tips_df['fee_msats'].fillna(0) + fees_tips_df['tip_msats'].fillna(0)

In [5]:
# sanity checks: first sybil fee date aligns
first_sybil_date = fees_tips_df.loc[fees_tips_df['sybil_fee'], 'created_at'].min()
print(f"First sybil fee date: {first_sybil_date.strftime('%Y-%m-%d')}")
assert first_sybil_date.date() == globals.sf10_date.date()

First sybil fee date: 2022-11-23


In [6]:
# sanity checks: 30% sybil fee dates align
idx = fees_tips_df['sybil_fee']
fees_tips_df['sybil_fee_pct'] = None
fees_tips_df.loc[idx, 'sybil_fee_pct'] = fees_tips_df.loc[idx, 'fee_msats'] / fees_tips_df.loc[idx, 'total_msats']
idx = fees_tips_df['sybil_fee_pct']==0.3
first_sf30_date = fees_tips_df.loc[idx, 'created_at'].min()
print(f"First 30% sybil fee date: {first_sf30_date.strftime('%Y-%m-%d')}")
assert first_sf30_date.date() == globals.sf30_date.date()

First 30% sybil fee date: 2024-09-19


In [11]:
# sanity checks: post fee is the first itemact for each item
fees_tips_df['order'] = fees_tips_df.groupby('itemId')['created_at'].rank(method='first')
bad = fees_tips_df['post_fee'] & (fees_tips_df['order'] != 1)
fees_tips_df.loc[bad].head(3)

Unnamed: 0,fee_id,created_at,fee_updated_at,fee_msats,fee_act,itemId,userId,fee_invoiceActionState,fee_invoiceId,tip_id,...,tip_msats,tip_act,tip_invoiceActionState,tip_invoiceId,is_zap,sybil_fee,post_fee,total_msats,sybil_fee_pct,order
51,59.0,2021-06-16 22:58:06.439,2021-06-16 22:58:06.439,1000.0,FEE,47,631,,,,...,,,,,False,False,True,1000.0,,2.0
254,651.0,2021-07-01 15:48:18.059,2021-07-01 15:48:18.059,1000.0,FEE,156,658,,,,...,,,,,False,False,True,1000.0,,2.0
400,931.0,2021-07-09 17:39:29.180,2021-07-09 17:39:29.180,1000.0,FEE,243,704,,,,...,,,,,False,False,True,1000.0,,2.0


In [12]:
fees_tips_df.loc[fees_tips_df['itemId']==47]

Unnamed: 0,fee_id,created_at,fee_updated_at,fee_msats,fee_act,itemId,userId,fee_invoiceActionState,fee_invoiceId,tip_id,...,tip_msats,tip_act,tip_invoiceActionState,tip_invoiceId,is_zap,sybil_fee,post_fee,total_msats,sybil_fee_pct,order
49,,2021-06-16 22:52:24.755,NaT,,,47,616,,,57.0,...,1000.0,TIP,,,True,False,False,1000.0,,1.0
51,59.0,2021-06-16 22:58:06.439,2021-06-16 22:58:06.439,1000.0,FEE,47,631,,,,...,,,,,False,False,True,1000.0,,2.0
77,,2021-06-18 13:59:22.646,NaT,,,47,632,,,87.0,...,1000.0,TIP,,,True,False,False,1000.0,,3.0
