In [25]:
import pandas as pd 
import ast 
import json

In [26]:
test_address = 'AGPZnBZUxmhAtcp8XjT4n8bCia9dEYhhm16M2sfFvmTU'
WRAPPED_SOL = "So11111111111111111111111111111111111111112"
NATIVE_SOL  = "So11111111111111111111111111111111111111111"

In [27]:
test_wallet_timeseries = pd.read_csv('../data/processed/test_acc_timeseries_labeled.csv')

test_wallet_timeseries['timestamp'] = pd.to_datetime(test_wallet_timeseries['timestamp'])
received_tx = test_wallet_timeseries[test_wallet_timeseries['receiver']==test_address]
sent_tx = test_wallet_timeseries[test_wallet_timeseries['sender']==test_address]

In [28]:
#Total recognized transactions 

test_wallet_timeseries

total_tx = test_wallet_timeseries['signature'].nunique()
total_tx

493

In [29]:
#Total SOL sent volume

sol_sent_vol = sent_tx[sent_tx['token_address']==NATIVE_SOL]['token_amount'].sum()
sol_sent_vol

np.float64(47.911934371)

In [30]:
#Total SOL received volume

sol_received_vol = received_tx[received_tx['token_address']==NATIVE_SOL]['token_amount'].sum()
sol_received_vol

np.float64(47.641967637)

In [31]:
# Total received volume by token

token_received_df = received_tx[received_tx['token_address']!=NATIVE_SOL]
tokens_recieved_dict = token_received_df.groupby('SYMBOL')[['token_amount']].sum().to_dict()
tokens_recieved_dict

{'token_amount': {'$WEN': 643652.0,
  'BLZE': 195.106114674,
  'BSOL': 5.571652628,
  'JITOSOL': 1.130972666,
  'KMNO': 84.916367,
  'MSOL': 0.340143184,
  'NEON': 4.850918013,
  'PENGU': 3490.0,
  'PYUSD': 15865.709742,
  'SOL': 1.58567275,
  'USDC': 2468.463001,
  'USDT': 6693.240062}}

In [32]:
# Total sent volume by token

token_sent_df = sent_tx[sent_tx['token_address']!=NATIVE_SOL]
tokens_sent_dict = token_sent_df.groupby('SYMBOL')[['token_amount']].sum().to_dict()
tokens_sent_dict

{'token_amount': {'$WEN': 1287304.0,
  'BLZE': 1298.4623413030001,
  'BSOL': 5.571652628,
  'JITOSOL': 1.12680855,
  'KMNO': 95.038552,
  'MSOL': 0.340143184,
  'PENGU': 3490.0,
  'PYUSD': 15865.709742000001,
  'SOL': 0.560430994,
  'USDC': 2468.463001,
  'USDT': 6693.240062}}

In [33]:
pd.set_option('display.float_format', '{:.8f}'.format)
# Allow wider display for signature or other long strings
pd.set_option('display.max_colwidth', None)  # or use a large number like 200
pd.set_option('display.max_columns', None)   # ensures all columns are shown
pd.set_option('display.width', 200)    

In [34]:
# First tx time

first_tx = test_wallet_timeseries['timestamp'].min()

In [35]:
# Latest tx time

last_tx = test_wallet_timeseries['timestamp'].max()

In [36]:
test_wallet_timeseries = test_wallet_timeseries.sort_values('timestamp')

In [37]:
# Avg tx interval

avg_tx_interval = test_wallet_timeseries['timestamp'].diff().mean()
print(f'avg_tx_interval: {avg_tx_interval}')

avg_seconds = avg_tx_interval.total_seconds()
avg_minutes = avg_seconds / 60
avg_hours = avg_seconds / 3600

avg_tx_interval: 0 days 12:24:41.231707317


In [38]:
# Number of unique receivers

unique_receivers = len(test_wallet_timeseries['receiver'].unique())

In [39]:
# Number of unique senders

unique_senders = len(test_wallet_timeseries['sender'].unique())

In [40]:
wallet_analysis_dict = {
    'wallet_address':test_address,
    'entity_label': test_wallet_timeseries['wallet_entity_label'].iloc[0],
    'num_transactions': total_tx,
    'total_sol_volume_sent': sol_sent_vol,
    'total_sol_volume_received': sol_received_vol,
    'total_token_volume_sent':tokens_sent_dict,
    'total_token_volume_recieved':tokens_recieved_dict,
    'first_tx_time':first_tx,
    'last_tx_time':last_tx,
    'avg_tx_interval (seconds)':avg_seconds,
    'num_unique_senders':unique_senders,
    'num_unique_receivers':unique_receivers
}

wallet_analysis_dict

{'wallet_address': 'AGPZnBZUxmhAtcp8XjT4n8bCia9dEYhhm16M2sfFvmTU',
 'entity_label': 'Unknown Entity',
 'num_transactions': 493,
 'total_sol_volume_sent': np.float64(47.911934371),
 'total_sol_volume_received': np.float64(47.641967637),
 'total_token_volume_sent': {'token_amount': {'$WEN': 1287304.0,
   'BLZE': 1298.4623413030001,
   'BSOL': 5.571652628,
   'JITOSOL': 1.12680855,
   'KMNO': 95.038552,
   'MSOL': 0.340143184,
   'PENGU': 3490.0,
   'PYUSD': 15865.709742000001,
   'SOL': 0.560430994,
   'USDC': 2468.463001,
   'USDT': 6693.240062}},
 'total_token_volume_recieved': {'token_amount': {'$WEN': 643652.0,
   'BLZE': 195.106114674,
   'BSOL': 5.571652628,
   'JITOSOL': 1.130972666,
   'KMNO': 84.916367,
   'MSOL': 0.340143184,
   'NEON': 4.850918013,
   'PENGU': 3490.0,
   'PYUSD': 15865.709742,
   'SOL': 1.58567275,
   'USDC': 2468.463001,
   'USDT': 6693.240062}},
 'first_tx_time': Timestamp('2023-11-14 15:20:07'),
 'last_tx_time': Timestamp('2025-04-06 12:12:19'),
 'avg_tx_in

In [41]:
wallet_analysis_df = pd.DataFrame([wallet_analysis_dict]) # I think token vol sent/received should be tracked as its own DF
wallet_analysis_df.head()
%store wallet_analysis_df

Stored 'wallet_analysis_df' (DataFrame)


In [42]:
wallet_analysis_df.head()

Unnamed: 0,wallet_address,entity_label,num_transactions,total_sol_volume_sent,total_sol_volume_received,total_token_volume_sent,total_token_volume_recieved,first_tx_time,last_tx_time,avg_tx_interval (seconds),num_unique_senders,num_unique_receivers
0,AGPZnBZUxmhAtcp8XjT4n8bCia9dEYhhm16M2sfFvmTU,Unknown Entity,493,47.91193437,47.64196764,"{'token_amount': {'$WEN': 1287304.0, 'BLZE': 1298.4623413030001, 'BSOL': 5.571652628, 'JITOSOL': 1.12680855, 'KMNO': 95.038552, 'MSOL': 0.340143184, 'PENGU': 3490.0, 'PYUSD': 15865.709742000001, 'SOL': 0.560430994, 'USDC': 2468.463001, 'USDT': 6693.240062}}","{'token_amount': {'$WEN': 643652.0, 'BLZE': 195.106114674, 'BSOL': 5.571652628, 'JITOSOL': 1.130972666, 'KMNO': 84.916367, 'MSOL': 0.340143184, 'NEON': 4.850918013, 'PENGU': 3490.0, 'PYUSD': 15865.709742, 'SOL': 1.58567275, 'USDC': 2468.463001, 'USDT': 6693.240062}}",2023-11-14 15:20:07,2025-04-06 12:12:19,44681.231707,105,176


# Track funding sources and flow of assets

In [43]:

# Wallet Analysis Module
class WalletAnalysis:
    def __init__(self, df):
        self.df = df
    
    def track_funding_sources_and_flow(self, wallet_address):
        wallet_data = self.df[self.df['wallet_address'] == wallet_address]
        if wallet_data.empty:
            return {"error": "Wallet address not found"}

        # Copy relevant subset
        funding_sources = self.df[[
            'wallet_address',
            'entity_label',
            'num_unique_senders',
            'num_unique_receivers',
            'total_sol_volume_sent',
            'total_sol_volume_received',
            'total_token_volume_sent',
            'total_token_volume_recieved'
        ]].copy()

        # Parse and extract numeric totals from token dicts
        funding_sources['Token Sent (Total)'] = funding_sources['total_token_volume_sent'].apply(
            lambda x: self.extract_token_volumes(self.safe_parse(x))
        )
        funding_sources['Token Received (Total)'] = funding_sources['total_token_volume_recieved'].apply(
            lambda x: self.extract_token_volumes(self.safe_parse(x))
        )

        # Rename other fields
        funding_sources.rename(columns={
            'wallet_address': 'Wallet Address',
            'entity_label': 'Entity Label',
            'num_unique_senders': 'Unique Senders',
            'num_unique_receivers': 'Unique Receivers',
            'total_sol_volume_sent': 'SOL Sent',
            'total_sol_volume_received': 'SOL Received',
        }, inplace=True)

        return funding_sources


    @staticmethod
    def safe_parse(raw):
        if isinstance(raw, dict):
            return raw
        try:
            return json.loads(raw.replace("'", "\""))
        except Exception:
            return {}

    @staticmethod
    def extract_token_volumes(token_dict):
        if isinstance(token_dict, dict) and 'token_amount' in token_dict:
            return sum(token_dict['token_amount'].values())
        return 0


    
    
    def transaction_history(self, wallet_address):
        # Filter data for the specified wallet
        wallet_data = self.df[self.df['wallet_address'] == wallet_address]
        
        if wallet_data.empty:
            return {"error": "Wallet address not found"}

        # Parse token volumes safely
        try:
            token_sent = json.loads(wallet_data['total_token_volume_sent'].iloc[0].replace("'", "\""))
        except (json.JSONDecodeError, TypeError, AttributeError):
            token_sent = {}

        try:
            token_received = json.loads(wallet_data['total_token_volume_recieved'].iloc[0].replace("'", "\""))
        except (json.JSONDecodeError, TypeError, AttributeError):
            token_received = {}

        # Construct transaction history summary
        history = {
            "wallet_address": wallet_address,
            "num_transactions": wallet_data['num_transactions'].iloc[0],
            "total_sol_volume_sent": wallet_data['total_sol_volume_sent'].iloc[0],
            "total_sol_volume_received": wallet_data['total_sol_volume_received'].iloc[0],
            "total_token_volume_sent": token_sent,
            "total_token_volume_received": token_received,
            "first_transaction": wallet_data['first_tx_time'].iloc[0],
            "last_transaction": wallet_data['last_tx_time'].iloc[0],
            "avg_tx_interval_seconds": wallet_data['avg_tx_interval (seconds)'].iloc[0]
        }

        return history

    
    def key_activity_patterns_and_risk_factors(self, wallet_address):
        # Identifying key activity patterns: Average transaction interval, number of unique senders/receivers
        wallet_data = self.df[self.df['wallet_address'] == wallet_address]
        if wallet_data.empty:
            return {"error": "Wallet address not found"}
        
        # Calculate activity metrics
        first_tx = pd.to_datetime(wallet_data['first_tx_time'].iloc[0])
        last_tx = pd.to_datetime(wallet_data['last_tx_time'].iloc[0])
        active_period_days = (last_tx - first_tx).days
        
        patterns = {
            "wallet_address": wallet_address,
            "active_period_days": active_period_days,
            "avg_tx_per_day": wallet_data['num_transactions'].iloc[0] / max(active_period_days, 1),
            "sender_to_receiver_ratio": wallet_data['num_unique_senders'].iloc[0] / max(wallet_data['num_unique_receivers'].iloc[0], 1),
            "sol_net_flow": wallet_data['total_sol_volume_received'].iloc[0] - wallet_data['total_sol_volume_sent'].iloc[0]
        }
        return patterns
    
    def _identify_risk_based_on_activity(self, wallet_address, sol_volume_threshold=100, tx_frequency_threshold=10, unknown_entity=True):
        # A simple threshold logic to flag risky behavior based on high activity
        wallet_data = self.df[self.df['wallet_address'] == wallet_address]
        if wallet_data.empty:
            return {"error": "Wallet address not found"}
        
        # Get activity patterns
        patterns = self.identify_activity_patterns(wallet_address)
        
        # Initialize risk flags
        risks = {
            "wallet_address": wallet_address,
            "high_volume_risk": False,
            "high_frequency_risk": False,
            "unknown_entity_risk": False,
            "risk_summary": []
        }
        
        # Check for high SOL volume
        total_sol_volume = (wallet_data['total_sol_volume_sent'].iloc[0] + 
                           wallet_data['total_sol_volume_received'].iloc[0])
        if total_sol_volume > sol_volume_threshold:
            risks["high_volume_risk"] = True
            risks["risk_summary"].append(f"High SOL volume: {total_sol_volume:.2f} SOL")
        
        # Check for high transaction frequency
        if patterns["avg_tx_per_day"] > tx_frequency_threshold:
            risks["high_frequency_risk"] = True
            risks["risk_summary"].append(f"High tx frequency: {patterns['avg_tx_per_day']:.2f} tx/day")
        
        # Check for unknown entity
        if unknown_entity and wallet_data['entity_label'].iloc[0] == "Unknown Entity":
            risks["unknown_entity_risk"] = True
            risks["risk_summary"].append("Unknown entity label")
        
        return risks


In [44]:

# # Wallet Analysis Module
# class WalletAnalysis:
#     def __init__(self, df):
#         self.df = df
    
#     def track_funding_sources_and_flow(self, wallet_address):
#         # Extract token volume totals
#         # self.df['token_sent_total'] = self.df['total_token_volume_sent'].apply(self.extract_token_volumes)
#         # self.df['token_received_total'] = self.df['total_token_volume_recieved'].apply(self.extract_token_volumes)

#         wallet_data = self.df[self.df['wallet_address'] == wallet_address]
#         if wallet_data.empty:
#             return {"error": "Wallet address not found"}

#         # Select and rename relevant fields
#         funding_sources = self.df[[
#             'wallet_address',
#             'entity_label',
#             'num_unique_senders',
#             'num_unique_receivers',
#             'total_sol_volume_sent',
#             'total_sol_volume_received',
#             'total_token_volume_sent',
#             'total_token_volume_recieved'
#         ]].copy()

#         # Optional: rename columns for clarity
#         funding_sources.columns = [
#             'Wallet Address',
#             'Entity Label',
#             'Unique Senders',
#             'Unique Receivers',
#             'SOL Sent',
#             'SOL Received',
#             'Token Sent (Total)',
#             'Token Received (Total)'
#         ]

#         return funding_sources


#     @staticmethod
#     def extract_token_volumes(token_dict):
#         if isinstance(token_dict, dict) and 'token_amount' in token_dict:
#             return sum(token_dict['token_amount'].values())
#         return 0

    
    
#     def transaction_history(self, wallet_address):
#         # Filter data for the specified wallet
#         wallet_data = self.df[self.df['wallet_address'] == wallet_address]
        
#         if wallet_data.empty:
#             return {"error": "Wallet address not found"}

#         # Parse token volumes safely
#         try:
#             token_sent = json.loads(wallet_data['total_token_volume_sent'].iloc[0].replace("'", "\""))
#         except (json.JSONDecodeError, TypeError, AttributeError):
#             token_sent = {}

#         try:
#             token_received = json.loads(wallet_data['total_token_volume_recieved'].iloc[0].replace("'", "\""))
#         except (json.JSONDecodeError, TypeError, AttributeError):
#             token_received = {}

#         # Construct transaction history summary
#         history = {
#             "wallet_address": wallet_address,
#             "num_transactions": wallet_data['num_transactions'].iloc[0],
#             "total_sol_volume_sent": wallet_data['total_sol_volume_sent'].iloc[0],
#             "total_sol_volume_received": wallet_data['total_sol_volume_received'].iloc[0],
#             "total_token_volume_sent": token_sent,
#             "total_token_volume_received": token_received,
#             "first_transaction": wallet_data['first_tx_time'].iloc[0],
#             "last_transaction": wallet_data['last_tx_time'].iloc[0],
#             "avg_tx_interval_seconds": wallet_data['avg_tx_interval (seconds)'].iloc[0]
#         }

#         return history

    
#     def key_activity_patterns_and_risk_factors(self, wallet_address):
#         # Identifying key activity patterns: Average transaction interval, number of unique senders/receivers
#         wallet_data = self.df[self.df['wallet_address'] == wallet_address]
#         if wallet_data.empty:
#             return {"error": "Wallet address not found"}
        
#         # Calculate activity metrics
#         first_tx = pd.to_datetime(wallet_data['first_tx_time'].iloc[0])
#         last_tx = pd.to_datetime(wallet_data['last_tx_time'].iloc[0])
#         active_period_days = (last_tx - first_tx).days
        
#         patterns = {
#             "wallet_address": wallet_address,
#             "active_period_days": active_period_days,
#             "avg_tx_per_day": wallet_data['num_transactions'].iloc[0] / max(active_period_days, 1),
#             "sender_to_receiver_ratio": wallet_data['num_unique_senders'].iloc[0] / max(wallet_data['num_unique_receivers'].iloc[0], 1),
#             "sol_net_flow": wallet_data['total_sol_volume_received'].iloc[0] - wallet_data['total_sol_volume_sent'].iloc[0]
#         }
#         return patterns
    
#     def _identify_risk_based_on_activity(self, wallet_address, sol_volume_threshold=100, tx_frequency_threshold=10, unknown_entity=True):
#         # A simple threshold logic to flag risky behavior based on high activity
#         wallet_data = self.df[self.df['wallet_address'] == wallet_address]
#         if wallet_data.empty:
#             return {"error": "Wallet address not found"}
        
#         # Get activity patterns
#         patterns = self.identify_activity_patterns(wallet_address)
        
#         # Initialize risk flags
#         risks = {
#             "wallet_address": wallet_address,
#             "high_volume_risk": False,
#             "high_frequency_risk": False,
#             "unknown_entity_risk": False,
#             "risk_summary": []
#         }
        
#         # Check for high SOL volume
#         total_sol_volume = (wallet_data['total_sol_volume_sent'].iloc[0] + 
#                            wallet_data['total_sol_volume_received'].iloc[0])
#         if total_sol_volume > sol_volume_threshold:
#             risks["high_volume_risk"] = True
#             risks["risk_summary"].append(f"High SOL volume: {total_sol_volume:.2f} SOL")
        
#         # Check for high transaction frequency
#         if patterns["avg_tx_per_day"] > tx_frequency_threshold:
#             risks["high_frequency_risk"] = True
#             risks["risk_summary"].append(f"High tx frequency: {patterns['avg_tx_per_day']:.2f} tx/day")
        
#         # Check for unknown entity
#         if unknown_entity and wallet_data['entity_label'].iloc[0] == "Unknown Entity":
#             risks["unknown_entity_risk"] = True
#             risks["risk_summary"].append("Unknown entity label")
        
#         return risks


In [45]:
# Initialize Wallet Analysis Module
wallet_analysis = WalletAnalysis(wallet_analysis_df)

wallet_address = "AGPZnBZUxmhAtcp8XjT4n8bCia9dEYhhm16M2sfFvmTU"


In [46]:
# Track funding sources and asset flow
funding_sources = wallet_analysis.track_funding_sources_and_flow(wallet_address)
print("Funding Sources and Asset Flow:")
print(funding_sources)

Funding Sources and Asset Flow:
                                 Wallet Address    Entity Label  Unique Senders  Unique Receivers    SOL Sent  SOL Received  \
0  AGPZnBZUxmhAtcp8XjT4n8bCia9dEYhhm16M2sfFvmTU  Unknown Entity             105               176 47.91193437   47.64196764   

                                                                                                                                                                                                                                             total_token_volume_sent  \
0  {'token_amount': {'$WEN': 1287304.0, 'BLZE': 1298.4623413030001, 'BSOL': 5.571652628, 'JITOSOL': 1.12680855, 'KMNO': 95.038552, 'MSOL': 0.340143184, 'PENGU': 3490.0, 'PYUSD': 15865.709742000001, 'SOL': 0.560430994, 'USDC': 2468.463001, 'USDT': 6693.240062}}   

                                                                                                                                                                                        

In [47]:
# Transaction History
transaction_history = wallet_analysis.transaction_history(wallet_address)
print("\nTransaction History:")
print(transaction_history)


Transaction History:
{'wallet_address': 'AGPZnBZUxmhAtcp8XjT4n8bCia9dEYhhm16M2sfFvmTU', 'num_transactions': np.int64(493), 'total_sol_volume_sent': np.float64(47.911934371), 'total_sol_volume_received': np.float64(47.641967637), 'total_token_volume_sent': {}, 'total_token_volume_received': {}, 'first_transaction': Timestamp('2023-11-14 15:20:07'), 'last_transaction': Timestamp('2025-04-06 12:12:19'), 'avg_tx_interval_seconds': np.float64(44681.231707)}


In [48]:
# Key Activity Patterns and Risk Factors
activity_patterns = wallet_analysis.key_activity_patterns_and_risk_factors(wallet_address)
print("\nActivity Patterns and Risk Factors:")
print(activity_patterns)


Activity Patterns and Risk Factors:
{'wallet_address': 'AGPZnBZUxmhAtcp8XjT4n8bCia9dEYhhm16M2sfFvmTU', 'active_period_days': 508, 'avg_tx_per_day': np.float64(0.9704724409448819), 'sender_to_receiver_ratio': np.float64(0.5965909090909091), 'sol_net_flow': np.float64(-0.2699667340000005)}
